diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index f49354540e..8e23597a4e 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -28,5 +28,5 @@
 /test/ttmlir/Dialect/TTNN/optimizer/ @nobradovictt @odjuricicTT
 /test/ttmlir/Silicon/TTNN/optimizer/ @nobradovictt @odjuricicTT
 /test/unittests/Optimizer @nobradovictt @odjuricicTT
-/tools/explorer/ @odjuricicTT @nobradovictt @vprajapati-tt
 /tools/ @svuckovicTT @mtopalovicTT
+/tools/explorer/ @odjuricicTT @nobradovictt @vprajapati-tt
diff --git a/docs/src/adding-an-op.md b/docs/src/adding-an-op.md
index 74c713f0c7..cbe4a44c10 100644
--- a/docs/src/adding-an-op.md
+++ b/docs/src/adding-an-op.md
@@ -53,18 +53,13 @@ There are many things to break down here, starting from the top:
   be critical for modeling buffer allocation / lifetimes. Note the 3rd argument
   `AnyRankedTensor:$output`.
 - Next we have a list of `arguments`.  These arguments consist of a mixture of
-  `Type`s (i.e. `AnyRankedTensor`) and `Attribute`s (i.e. `TT_OperandConstraintArrayAttr`).
+  `Type`s (i.e. `AnyRankedTensor`) and `Attribute`s.
   [Read more about Types & Attributes
   here](https://mlir.llvm.org/docs/DefiningDialects/AttributesAndTypes/#attributes).
     - `AnyRankedTensor` is part of a tablegen standard library which type
       aliases to MLIR's builtin Tensor type, with the added constraint that the
       tensor has a static rank.  As much as possible we want to use the builtin
       types and infrastructure provided by MLIR.
-    - `TT_OperandConstraintArrayAttr` is a custom attribute that we have defined
-      in the [`TT`](./autogen/md/Dialect/TTDialect.md) dialect.  This attribute is
-      used to specify constraints on the
-      operands of the operation.  For example, the `TTIR_MatmulOp` requires that
-      the input tensors be in tile layout, this attribute captures this constraint.
 - Next we have a list of `results` in this case just 1, which aliases the
   `output` tensor.  One drawback of DPS is that the result tensor and the
   output tensor will appear to have different SSA names in the IR, but they
diff --git a/docs/src/overview.md b/docs/src/overview.md
index f2e87fa032..b181b822a2 100644
--- a/docs/src/overview.md
+++ b/docs/src/overview.md
@@ -104,8 +104,7 @@ module attributes {tt.system_desc = #tt.system_desc<[<#tt.arch<wormhole_b0>, #tt
         defines the type of result
 
     -   Quotes are added around ttir.multiply since it's part of a
-        custom dialect, and more custom assembly instructions are
-        applied to specify operand_constraints.
+        custom dialect.
 
     -   Operations typically have operands (arguments) and results which
         are highlighted with %, these results and operands help to show
diff --git a/include/ttmlir-c/TTAttrs.h b/include/ttmlir-c/TTAttrs.h
index 263cd1d8e4..6884eb3869 100644
--- a/include/ttmlir-c/TTAttrs.h
+++ b/include/ttmlir-c/TTAttrs.h
@@ -69,13 +69,6 @@ ttmlirTTIteratorTypeAttrGet(MlirContext ctx, uint32_t iteratorType);
 MLIR_CAPI_EXPORTED MlirAttribute ttmlirTTIteratorTypeArrayAttrGet(
     MlirContext ctx, uint32_t *iteratorTypes, size_t iteratorTypesSize);
 
-MLIR_CAPI_EXPORTED MlirAttribute
-ttmlirTTOperandConstraintAttrGet(MlirContext ctx, uint32_t OperandConstraint);
-
-MLIR_CAPI_EXPORTED MlirAttribute ttmlirTTOperandConstraintArrayAttrGet(
-    MlirContext ctx, uint32_t *OperandConstraints,
-    size_t OperandConstraintsSize);
-
 MLIR_CAPI_EXPORTED MlirAttribute ttmlirTTTileSizeAttrGet(MlirContext ctx,
                                                          int64_t y, int64_t x);
 
diff --git a/include/ttmlir/Bindings/Python/TTMLIRModule.h b/include/ttmlir/Bindings/Python/TTMLIRModule.h
index d36529e676..49ec0e912b 100644
--- a/include/ttmlir/Bindings/Python/TTMLIRModule.h
+++ b/include/ttmlir/Bindings/Python/TTMLIRModule.h
@@ -62,6 +62,7 @@ void populateTTNNModule(py::module &m);
 void populateOverridesModule(py::module &m);
 void populateOptimizerOverridesModule(py::module &m);
 void populatePassesModule(py::module &m);
+void populateUtilModule(py::module &m);
 } // namespace mlir::ttmlir::python
 
 #endif // TTMLIR_BINDINGS_PYTHON_TTMLIRMODULE_H
diff --git a/include/ttmlir/Dialect/TT/IR/TTOps.h b/include/ttmlir/Dialect/TT/IR/TTOps.h
index 047fc2a3c4..16219c04e3 100644
--- a/include/ttmlir/Dialect/TT/IR/TTOps.h
+++ b/include/ttmlir/Dialect/TT/IR/TTOps.h
@@ -9,6 +9,7 @@
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/TypeUtilities.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/DestinationStyleOpInterface.h"
 #include "mlir/Interfaces/InferTypeOpInterface.h"
diff --git a/include/ttmlir/Dialect/TT/IR/TTOps.td b/include/ttmlir/Dialect/TT/IR/TTOps.td
index d3b34fff88..9078028ab1 100644
--- a/include/ttmlir/Dialect/TT/IR/TTOps.td
+++ b/include/ttmlir/Dialect/TT/IR/TTOps.td
@@ -6,5 +6,30 @@
 #define TTMLIR_TTMLIR_TTOPS_TD
 
 include "ttmlir/Dialect/TT/IR/TTOpsTypes.td"
+include "mlir/Interfaces/InferTypeOpInterface.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/IR/CommonTypeConstraints.td"
+
+def TT_GetTupleElementOp: TT_Op<"get_tuple_element", [Pure, DeclareOpInterfaceMethods<InferTypeOpInterface>]> {
+    let summary = "GetTupleElement operation";
+    let description = [{
+      Extracts element at `index` position of the `operand` tuple and produces a `result`.
+
+      Example:
+      ```mlir
+      %result = tt.get_tuple_element %operand[0] : (tuple<tensor<32x32xbf16>, tuple<tensor<1x32xf32>>>) -> tensor<32x32xbf16>
+      ```
+    }];
+
+    let arguments = (ins TT_Tuple:$operand,
+                         ConfinedAttr<I32Attr, [IntNonNegative]>:$index
+    );
+
+    let results = (outs TT_TupleReturnType:$result);
+
+    let assemblyFormat = [{
+      $operand `[` $index `]` attr-dict `:` functional-type(operands, results)
+    }];
+}
 
 #endif
diff --git a/include/ttmlir/Dialect/TT/IR/TTOpsEnums.td b/include/ttmlir/Dialect/TT/IR/TTOpsEnums.td
index aee19f63c6..9ffa63dade 100644
--- a/include/ttmlir/Dialect/TT/IR/TTOpsEnums.td
+++ b/include/ttmlir/Dialect/TT/IR/TTOpsEnums.td
@@ -126,47 +126,6 @@ def TT_OOBVal : I32EnumAttr<"OOBVal", "TT OOBVal",
   let cppNamespace = "::mlir::tt";
 }
 
-def TT_OperandConstraintSystem : I32BitEnumAttrCaseBit<"System", 0, "system">;
-def TT_OperandConstraintDRAM : I32BitEnumAttrCaseBit<"DRAM", 1, "dram">;
-def TT_OperandConstraintL1 : I32BitEnumAttrCaseBit<"L1", 2, "l1">;
-def TT_OperandConstraintScalar : I32BitEnumAttrCaseBit<"Scalar", 3, "scalar">;
-def TT_OperandConstraintTile : I32BitEnumAttrCaseBit<"Tile", 4, "tile">;
-def TT_OperandConstraintNone : I32BitEnumAttrCaseBit<"None", 5, "none">;
-def TT_OperandConstraintInterleaved : I32BitEnumAttrCaseBit<"Interleaved", 6, "interleaved">;
-def TT_OperandConstraintSingleBank : I32BitEnumAttrCaseBit<"SingleBank", 7, "single_bank">;
-def TT_OperandConstraintHeightSharded : I32BitEnumAttrCaseBit<"HeightSharded", 8, "height_sharded">;
-def TT_OperandConstraintWidthSharded : I32BitEnumAttrCaseBit<"WidthSharded", 9, "width_sharded">;
-def TT_OperandConstraintBlockSharded : I32BitEnumAttrCaseBit<"BlockSharded", 10, "block_sharded">;
-def TT_OperandConstraintSystemScalar : I32BitEnumAttrCaseGroup<"SystemScalar", [TT_OperandConstraintSystem, TT_OperandConstraintScalar], "system_scalar">;
-def TT_OperandConstraintAnyLayout : I32BitEnumAttrCaseGroup<"AnyLayout", [TT_OperandConstraintNone, TT_OperandConstraintInterleaved, TT_OperandConstraintSingleBank, TT_OperandConstraintHeightSharded, TT_OperandConstraintWidthSharded, TT_OperandConstraintBlockSharded], "any_layout">;
-def TT_OperandConstraintAny : I32BitEnumAttrCaseGroup<"Any", [TT_OperandConstraintSystem, TT_OperandConstraintDRAM, TT_OperandConstraintL1, TT_OperandConstraintScalar, TT_OperandConstraintTile, TT_OperandConstraintAnyLayout], "any">;
-def TT_OperandConstraintAnyDevice : I32BitEnumAttrCaseGroup<"AnyDevice", [TT_OperandConstraintDRAM, TT_OperandConstraintL1, TT_OperandConstraintScalar, TT_OperandConstraintTile, TT_OperandConstraintAnyLayout], "any_device">;
-def TT_OperandConstraintAnyDeviceTile : I32BitEnumAttrCaseGroup<"AnyDeviceTile", [TT_OperandConstraintDRAM, TT_OperandConstraintL1, TT_OperandConstraintTile, TT_OperandConstraintAnyLayout], "any_device_tile">;
-def TT_OperandConstraintL1BlockSharded : I32BitEnumAttrCaseGroup<"L1BlockSharded", [TT_OperandConstraintL1, TT_OperandConstraintScalar, TT_OperandConstraintTile, TT_OperandConstraintBlockSharded], "l1_block_sharded">;
-def TT_OperandConstraint : I32BitEnumAttr<"OperandConstraint", "TT Operand Constraints",
-                           [
-                            TT_OperandConstraintSystem,
-                            TT_OperandConstraintDRAM,
-                            TT_OperandConstraintL1,
-                            TT_OperandConstraintScalar,
-                            TT_OperandConstraintTile,
-                            TT_OperandConstraintNone,
-                            TT_OperandConstraintInterleaved,
-                            TT_OperandConstraintSingleBank,
-                            TT_OperandConstraintHeightSharded,
-                            TT_OperandConstraintWidthSharded,
-                            TT_OperandConstraintBlockSharded,
-                            TT_OperandConstraintSystemScalar,
-                            TT_OperandConstraintAnyLayout,
-                            TT_OperandConstraintAny,
-                            TT_OperandConstraintAnyDevice,
-                            TT_OperandConstraintAnyDeviceTile,
-                            TT_OperandConstraintL1BlockSharded,
-                           ]> {
-  let genSpecializedAttr = 0;
-  let cppNamespace = "::mlir::tt";
-}
-
 def TT_ChipCapabilityPCIE : I32BitEnumAttrCaseBit<"PCIE", 0, "pcie">;
 def TT_ChipCapabilityHostMMIO : I32BitEnumAttrCaseBit<"HostMMIO", 1, "host_mmio">;
 
diff --git a/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td b/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td
index d5dc22e28d..7472c298b7 100644
--- a/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td
+++ b/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td
@@ -428,12 +428,6 @@ def TT_IteratorTypeAttr : EnumAttr<TT_Dialect, TT_IteratorType, "iterator_type">
 
 def TT_IteratorTypeArrayAttr : TypedArrayAttrBase<TT_IteratorTypeAttr, "">;
 
-def TT_OperandConstraintAttr : EnumAttr<TT_Dialect, TT_OperandConstraint, "operand_constraint"> {
-  let assemblyFormat = "`<` $value `>`";
-}
-
-def TT_OperandConstraintArrayAttr : TypedArrayAttrBase<TT_OperandConstraintAttr, "">;
-
 def TT_ArgumentAllocationAttr : TT_Attr<"ArgumentAllocation", "arg_alloc", []> {
   let summary = "Argument allocation attribute in TT dialect";
   let description = [{
@@ -494,4 +488,12 @@ def TT_Device : TT_Type<"Device", "device", []> {
     let assemblyFormat = "`<` $desc `>`";
 }
 
+//===----------------------------------------------------------------------===//
+// Auxiliary type definitions
+//===----------------------------------------------------------------------===//
+
+def TT_Tuple : NestedTupleOf<[AnyRankedTensor]>;
+
+def TT_TupleReturnType : AnyTypeOf<[AnyRankedTensor]>;
+
 #endif
diff --git a/include/ttmlir/Dialect/TT/Utils/OperandConstraints.h b/include/ttmlir/Dialect/TT/Utils/OperandConstraints.h
deleted file mode 100644
index b6147102d9..0000000000
--- a/include/ttmlir/Dialect/TT/Utils/OperandConstraints.h
+++ /dev/null
@@ -1,95 +0,0 @@
-// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#ifndef TTMLIR_DIALECT_TT_UTILS_OPERANDCONSTRAINTS_H
-#define TTMLIR_DIALECT_TT_UTILS_OPERANDCONSTRAINTS_H
-
-#include "ttmlir/Dialect/TT/IR/TT.h"
-#include "ttmlir/Dialect/TTIR/IR/TTIROps.h"
-
-namespace mlir::tt {
-
-inline OperandConstraint
-memorySpaceAsOperandConstraint(MemorySpace memorySpace) {
-  switch (memorySpace) {
-  case MemorySpace::System:
-  case MemorySpace::SystemMMIO:
-    return OperandConstraint::System;
-  case MemorySpace::DeviceDRAM:
-    return OperandConstraint::DRAM;
-  case MemorySpace::DeviceL1:
-    return OperandConstraint::L1;
-  }
-}
-
-inline OperandConstraint
-memoryLayoutAsOperandConstraint(TensorMemoryLayout memoryLayout) {
-  switch (memoryLayout) {
-  case TensorMemoryLayout::None:
-    return OperandConstraint::None;
-  case TensorMemoryLayout::Interleaved:
-    return OperandConstraint::Interleaved;
-  case TensorMemoryLayout::SingleBank:
-    return OperandConstraint::SingleBank;
-  case TensorMemoryLayout::HeightSharded:
-    return OperandConstraint::HeightSharded;
-  case TensorMemoryLayout::WidthSharded:
-    return OperandConstraint::WidthSharded;
-  case TensorMemoryLayout::BlockSharded:
-    return OperandConstraint::BlockSharded;
-  }
-}
-
-inline MemorySpace getLegalMemorySpace(OperandConstraint operandConstraint,
-                                       MemorySpace defaultMemorySpace) {
-  if (bitEnumContainsAny(operandConstraint,
-                         memorySpaceAsOperandConstraint(defaultMemorySpace))) {
-    return defaultMemorySpace;
-  }
-  if (bitEnumContainsAny(operandConstraint, OperandConstraint::DRAM)) {
-    return MemorySpace::DeviceDRAM;
-  }
-  if (bitEnumContainsAny(operandConstraint, OperandConstraint::L1)) {
-    return MemorySpace::DeviceL1;
-  }
-  return MemorySpace::System;
-}
-
-inline TensorMemoryLayout
-getLegalTensorMemoryLayout(OperandConstraint operandConstraint,
-                           MemorySpace targetMemorySpace,
-                           TensorMemoryLayout defaultDeviceMemLayout) {
-  if (defaultDeviceMemLayout == TensorMemoryLayout::None) {
-    return TensorMemoryLayout::None;
-  }
-
-  if (isSystemMemorySpace(targetMemorySpace)) {
-    return TensorMemoryLayout::None;
-  }
-
-  assert(isDeviceMemorySpace(targetMemorySpace));
-  if (bitEnumContainsAny(operandConstraint, memoryLayoutAsOperandConstraint(
-                                                defaultDeviceMemLayout))) {
-    return defaultDeviceMemLayout;
-  }
-
-  std::map<OperandConstraint, TensorMemoryLayout> validLayoutsMap = {
-      {OperandConstraint::Interleaved, TensorMemoryLayout::Interleaved},
-      {OperandConstraint::SingleBank, TensorMemoryLayout::SingleBank},
-      {OperandConstraint::HeightSharded, TensorMemoryLayout::HeightSharded},
-      {OperandConstraint::WidthSharded, TensorMemoryLayout::WidthSharded},
-      {OperandConstraint::BlockSharded, TensorMemoryLayout::BlockSharded}};
-
-  for (const auto &[constraintLayout, memLayout] : validLayoutsMap) {
-    if (bitEnumContainsAny(operandConstraint, constraintLayout)) {
-      return memLayout;
-    }
-  }
-
-  return TensorMemoryLayout::None;
-}
-
-} // namespace mlir::tt
-
-#endif // TTMLIR_DIALECT_TT_UTILS_OPERANDCONSTRAINTS_H
diff --git a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
index ff1cc61bed..e685172c24 100644
--- a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
+++ b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
@@ -62,7 +62,6 @@ def TTIR_GenericOp : TTIR_DPSOp<"generic", [AttrSizedOperandSegments]> {
                          TT_GridAttr:$grid,
                          AffineMapArrayAttr:$indexing_maps,
                          TT_IteratorTypeArrayAttr:$iterator_types,
-                         TT_OperandConstraintArrayAttr:$operand_constraints,
                          DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$operand_cb_mapping); // index of input operand and index of cb go together
     let results = (outs Variadic<AnyRankedTensor>:$results);
     let regions = (region AnyRegion:$region);
@@ -126,11 +125,6 @@ def TTIR_ToLayoutOp : TTIR_Op<"to_layout", [DestinationStyleOpInterface, TTIROpI
 
     let extraClassDeclaration = [{
       MutableOperandRange getDpsInitsMutable() { return getOutputMutable(); }
-      ArrayAttr getOperandConstraints() {
-        return nullptr;
-        // TODO return below, but we need a way to properly create an ArrayAttr:
-        // return {OperandConstraint::Any, OperandConstraint::Any};
-      }
 
       struct CompoundComponents {
         bool isLayoutChange;
diff --git a/include/ttmlir/Dialect/TTNN/Analysis/BFInterleavedPolicy.h b/include/ttmlir/Dialect/TTNN/Analysis/BFInterleavedPolicy.h
new file mode 100644
index 0000000000..1744a1d415
--- /dev/null
+++ b/include/ttmlir/Dialect/TTNN/Analysis/BFInterleavedPolicy.h
@@ -0,0 +1,76 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef TTMLIR_DIALECT_TTNN_ANALYSIS_BFINTERLEAVEDPOLICY_H
+#define TTMLIR_DIALECT_TTNN_ANALYSIS_BFINTERLEAVEDPOLICY_H
+
+#include "ttmlir/Dialect/TT/IR/TTOpsTypes.h"
+#include "ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h"
+#include <cstdint>
+
+namespace mlir::tt::ttnn {
+
+// The goal of this policy is to always solve simple fork-joins if that is
+// possible. Fork-join is considered to be simple if there is no need for DRAM
+// spill in its execution. Furthermore, if DRAM spill is necessary, this policy
+// will not produce globally optimal solution.
+//
+class BFInterleavedPolicy : public MemoryLayoutAnalysisPolicy {
+public:
+  // In order to keep track of the L1 memory usage, we have to know two things
+  // for each op:
+  //    1. The L1 memory usage of each op's output tensor.
+  //    2. The number of op's users currently relying on the op's output tensor.
+  //       This is important for fork ops where the output tensor is used by
+  //       multiple other ops.
+  //
+  struct OpL1MemUsage {
+    uint64_t l1MemUsagePerUser;
+    uint64_t numOfUnscheduledUsers;
+  };
+
+public:
+  BFInterleavedPolicy(
+      Operation *rootOp, std::vector<L1ChainConfig> &l1ChainConfigs,
+      const llvm::DenseMap<Operation *, std::vector<TTNNLayoutAttr>>
+          &legalLayouts,
+      llvm::DenseMap<func::FuncOp, llvm::SmallVector<Operation *>> &schedule,
+      unsigned usableL1CacheSize)
+      : MemoryLayoutAnalysisPolicy(rootOp, l1ChainConfigs, legalLayouts,
+                                   schedule, usableL1CacheSize) {}
+
+  void run() final;
+
+private:
+  // Check if the op is analyzable. Op is analyzable if it has at least one
+  // legal layout.
+  bool isAnalyzable(Operation *op);
+
+  // Iterate over all operands of the op that satisfy the analyzability
+  // criterium defined by the isAnalyzable method. This is an abstraction
+  // for the boilerplate code used in different places within the policy.
+  //
+  void walkOnAnalyzableOperands(Operation *op,
+                                function_ref<void(Operation *)> callback);
+
+  // Fetch op's DRAM layout from legalLayouts.
+  bool hasDRAMBufferType(Operation *op);
+  TTNNLayoutAttr getDRAMLayout(Operation *op);
+
+  // Fetch op's L1 Interleaved layout from legalLayouts.
+  bool hasL1BufferType(Operation *op);
+  TTNNLayoutAttr getL1InterleavedLayout(Operation *op);
+
+  size_t getAvailableL1CacheSize() const {
+    // Figure out this const based on exec data, but will be replaced
+    // with API.
+    //
+    constexpr float tensorL1UsageCap = 0.75;
+    return tensorL1UsageCap * usableL1CacheSize;
+  }
+};
+
+} // namespace mlir::tt::ttnn
+
+#endif // TTMLIR_DIALECT_TTNN_ANALYSIS_BFINTERLEAVEDPOLICY_H
diff --git a/include/ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h b/include/ttmlir/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.h
similarity index 93%
rename from include/ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h
rename to include/ttmlir/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.h
index 2392cd7c9c..b7325332e9 100644
--- a/include/ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h
+++ b/include/ttmlir/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.h
@@ -2,8 +2,8 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
-#ifndef TTMLIR_DIALECT_TTNN_ANALYSIS_L1INTERLEAVEDPOLICY_H
-#define TTMLIR_DIALECT_TTNN_ANALYSIS_L1INTERLEAVEDPOLICY_H
+#ifndef TTMLIR_DIALECT_TTNN_ANALYSIS_GREEDYL1INTERLEAVEDPOLICY_H
+#define TTMLIR_DIALECT_TTNN_ANALYSIS_GREEDYL1INTERLEAVEDPOLICY_H
 
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h"
@@ -12,7 +12,7 @@
 
 namespace mlir::tt::ttnn {
 
-class L1InterleavedPolicy : public MemoryLayoutAnalysisPolicy {
+class GreedyL1InterleavedPolicy : public MemoryLayoutAnalysisPolicy {
 public:
   struct OpMemSpec {
     TTNNLayoutAttr layout;
@@ -46,7 +46,7 @@ class L1InterleavedPolicy : public MemoryLayoutAnalysisPolicy {
   };
 
 public:
-  L1InterleavedPolicy(
+  GreedyL1InterleavedPolicy(
       Operation *rootOp, std::vector<L1ChainConfig> &l1ChainConfigs,
       const llvm::DenseMap<Operation *, std::vector<TTNNLayoutAttr>>
           &legalLayouts,
@@ -124,4 +124,4 @@ class L1InterleavedPolicy : public MemoryLayoutAnalysisPolicy {
 
 } // namespace mlir::tt::ttnn
 
-#endif // TTMLIR_DIALECT_TTNN_ANALYSIS_L1INTERLEAVEDPOLICY_H
+#endif // TTMLIR_DIALECT_TTNN_ANALYSIS_GREEDYL1INTERLEAVEDPOLICY_H
diff --git a/include/ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h b/include/ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h
index b8aee2e4ea..32ab07a958 100644
--- a/include/ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h
+++ b/include/ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h
@@ -5,9 +5,7 @@
 #ifndef TTMLIR_DIALECT_TTNN_ANALYSIS_L1CHAINCONFIG_H
 #define TTMLIR_DIALECT_TTNN_ANALYSIS_L1CHAINCONFIG_H
 
-#include "ttmlir/Dialect/TT/IR/TTOpsTypes.h"
 #include "ttmlir/Dialect/TTNN/Analysis/ShardSolver.h"
-#include <unordered_set>
 
 namespace mlir::tt::ttnn {
 
@@ -19,7 +17,7 @@ struct OpL1MemSpec {
   // Tensor split factor for the output tensor of the op(working with a partial
   // tensor).
   //
-  uint tensorSplitFactor;
+  uint tensorSplitFactor = 1;
 
   // Layout of the output tensor of the op.
   //
@@ -56,6 +54,7 @@ class L1ChainConfig {
   void
   complete(const llvm::DenseMap<Operation *, TTNNLayoutAttr> &selectedOpLayout,
            std::unordered_set<Edge> &memReconfigEdges);
+  void complete();
 
   bool isEmpty() { return opL1MemSpecs.empty(); }
   void addOpL1MemSpec(OpL1MemSpec spec) {
@@ -70,6 +69,9 @@ class L1ChainConfig {
   const std::unordered_set<Edge> &getMemReconfigEdges() const {
     return memReconfigEdges;
   }
+
+  uint64_t size() const { return opL1MemSpecs.size(); }
+  void merge(L1ChainConfig &other);
 };
 
 } // namespace mlir::tt::ttnn
diff --git a/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h b/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h
index 4f6fcd8f47..33477a798a 100644
--- a/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h
+++ b/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h
@@ -17,6 +17,7 @@ class MemoryLayoutAnalysisPolicy {
   llvm::DenseMap<Operation *, std::vector<TTNNLayoutAttr>> legalLayouts;
   llvm::DenseMap<func::FuncOp, llvm::SmallVector<Operation *>> *schedule;
   unsigned usableL1CacheSize = 0;
+  DeviceAttr deviceAttr;
 
 public:
   virtual ~MemoryLayoutAnalysisPolicy() {};
diff --git a/include/ttmlir/Dialect/TTNN/Transforms/Passes.td b/include/ttmlir/Dialect/TTNN/Transforms/Passes.td
index 99a9bed24f..4597db87e1 100644
--- a/include/ttmlir/Dialect/TTNN/Transforms/Passes.td
+++ b/include/ttmlir/Dialect/TTNN/Transforms/Passes.td
@@ -86,4 +86,36 @@ def TTNNCreateInputGenerators: Pass<"ttnn-create-input-gens", "::mlir::ModuleOp"
   }];
 }
 
+def TTNNModifySignaturesForDylib: Pass<"ttnn-modify-signatures-for-dylib", "::mlir::ModuleOp"> {
+  let summary = "Modify signatures of the functions for dylib path.";
+  let description = [{
+    This pass is intended to be used only when the end result is a dylib!
+
+    It modifies signatures of forward functions so that they take a canonical
+    form. Essentially, input tensors are packed into a tuple and then accessed
+    in the function body. This allows for easier interfacing with the generated
+    dylib as the signatures are then uniform across all forward functions.
+
+    Given a forward function like this:
+
+    ```mlir
+    func.func @add(%arg0: tensor<32x32xbf16>, %arg1: tensor<32x32xbf16>) -> tensor<32x32xbf16> {
+      %0 = "ttnn.add"(%arg0, %arg1) : (tensor<32x32xbf16>, tensor<32x32xbf16>) -> tensor<32x32xbf16>
+      return %0 : tensor<32x32xbf16>
+    }
+    ```
+
+    The pass will modify the signature and prepend unpacking ops like so:
+
+    ```mlir
+    func.func @add(%arg0: tuple<tensor<32x32xbf16>, tensor<32x32xbf16>>) -> tensor<32x32xbf16> {
+      %0 = tt.get_tuple_element %arg0[0] : (tuple<tensor<32x32xbf16>, tensor<32x32xbf16>>) -> tensor<32x32xbf16>
+      %1 = tt.get_tuple_element %arg0[1] : (tuple<tensor<32x32xbf16>, tensor<32x32xbf16>>) -> tensor<32x32xbf16>
+      %2 = "ttnn.add"(%0, %1) : (tensor<32x32xbf16>, tensor<32x32xbf16>) -> tensor<32x32xbf16>
+      return %2 : tensor<32x32xbf16>
+    }
+    ```
+  }];
+}
+
 #endif
diff --git a/include/ttmlir/Dialect/TTNN/Utils/MemoryLayoutAnalysisParams.h b/include/ttmlir/Dialect/TTNN/Utils/MemoryLayoutAnalysisParams.h
index 5275e2340d..945afda812 100644
--- a/include/ttmlir/Dialect/TTNN/Utils/MemoryLayoutAnalysisParams.h
+++ b/include/ttmlir/Dialect/TTNN/Utils/MemoryLayoutAnalysisParams.h
@@ -10,7 +10,11 @@
 
 namespace mlir::tt {
 
-enum class MemoryLayoutAnalysisPolicyType { DFSharding, L1Interleaved };
+enum class MemoryLayoutAnalysisPolicyType {
+  DFSharding,
+  GreedyL1Interleaved,
+  BFInterleaved
+};
 
 struct MemoryLayoutAnalysisPolicyTypeParser
     : public llvm::cl::parser<MemoryLayoutAnalysisPolicyType> {
@@ -22,8 +26,10 @@ struct MemoryLayoutAnalysisPolicyTypeParser
              llvm::StringRef arg, MemoryLayoutAnalysisPolicyType &value) {
     value = llvm::StringSwitch<MemoryLayoutAnalysisPolicyType>(arg)
                 .Case("DFSharding", MemoryLayoutAnalysisPolicyType::DFSharding)
-                .Case("L1Interleaved",
-                      MemoryLayoutAnalysisPolicyType::L1Interleaved);
+                .Case("GreedyL1Interleaved",
+                      MemoryLayoutAnalysisPolicyType::GreedyL1Interleaved)
+                .Case("BFInterleaved",
+                      MemoryLayoutAnalysisPolicyType::BFInterleaved);
     return false;
   }
 
@@ -33,8 +39,11 @@ struct MemoryLayoutAnalysisPolicyTypeParser
     case MemoryLayoutAnalysisPolicyType::DFSharding:
       res += "DFSharding";
       break;
-    case MemoryLayoutAnalysisPolicyType::L1Interleaved:
-      res += "L1Interleaved";
+    case MemoryLayoutAnalysisPolicyType::GreedyL1Interleaved:
+      res += "GreedyL1Interleaved";
+      break;
+    case MemoryLayoutAnalysisPolicyType::BFInterleaved:
+      res += "BFInterleaved";
       break;
     }
     return res;
diff --git a/include/ttmlir/Dialect/TTNN/Utils/Utils.h b/include/ttmlir/Dialect/TTNN/Utils/Utils.h
index f214fa793d..d3fb76bda9 100644
--- a/include/ttmlir/Dialect/TTNN/Utils/Utils.h
+++ b/include/ttmlir/Dialect/TTNN/Utils/Utils.h
@@ -43,6 +43,12 @@ RankedTensorType
 createRankedTensorTypeWithEncoding(RankedTensorType tensorType,
                                    ttnn::TTNNLayoutAttr encoding);
 
+// Return the L1 memory usage of the output tensor of the given op.
+// Used within L1 interleaved policies.
+//
+uint64_t getOpOutputL1Usage(Operation *op, TTNNLayoutAttr opLayout,
+                            DeviceAttr &deviceAttr);
+
 } // namespace mlir::tt::ttnn::utils
 
 #endif // TTMLIR_DIALECT_TTNN_UTILS_UTILS_H
diff --git a/include/ttmlir/Utils.h b/include/ttmlir/Utils.h
index 49dad79e5e..ec7838b2f3 100644
--- a/include/ttmlir/Utils.h
+++ b/include/ttmlir/Utils.h
@@ -92,8 +92,7 @@ std::string join(const llvm::SmallVector<T> &vec,
 // This util function can be used as a helper to create an attribute from an
 // array of attributes for any type defined like for example:
 //
-// `def TT_OperandConstraintArrayAttr
-//            : TypedArrayAttrBase<TT_OperandConstraintAttr, "">;`
+// def TT_IteratorTypeArrayAttr : TypedArrayAttrBase<TT_IteratorTypeAttr, "">;
 //
 // since these don't get any special Cpp class generated for them from
 // tablegen.
diff --git a/lib/CAPI/TTAttrs.cpp b/lib/CAPI/TTAttrs.cpp
index c329f41d56..ae2bb49d55 100644
--- a/lib/CAPI/TTAttrs.cpp
+++ b/lib/CAPI/TTAttrs.cpp
@@ -167,28 +167,6 @@ MlirAttribute ttmlirTTIteratorTypeArrayAttrGet(MlirContext ctx,
   return wrap(ArrayAttr::get(unwrap(ctx), iteratorTypesArray));
 }
 
-MlirAttribute ttmlirTTOperandConstraintAttrGet(MlirContext ctx,
-                                               uint32_t operandConstraint) {
-  return wrap(OperandConstraintAttr::get(
-      unwrap(ctx), static_cast<tt::OperandConstraint>(operandConstraint)));
-}
-
-MlirAttribute
-ttmlirTTOperandConstraintArrayAttrGet(MlirContext ctx,
-                                      uint32_t *operandConstraints,
-                                      size_t operandConstraintsSize) {
-  std::vector<uint32_t> operandConstraintsEnumArray(
-      operandConstraints, operandConstraints + operandConstraintsSize);
-  std::vector<mlir::Attribute> operandConstraintsArray;
-
-  for (auto operandEnum : operandConstraintsEnumArray) {
-    operandConstraintsArray.push_back(OperandConstraintAttr::get(
-        unwrap(ctx), static_cast<tt::OperandConstraint>(operandEnum)));
-  }
-
-  return wrap(ArrayAttr::get(unwrap(ctx), operandConstraintsArray));
-}
-
 MlirAttribute ttmlirTTTileSizeAttrGet(MlirContext ctx, int64_t y, int64_t x) {
   return wrap(TileSizeAttr::get(unwrap(ctx), y, x));
 }
diff --git a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
index b1d438b905..c1a07b5fcd 100644
--- a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
+++ b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
@@ -5,6 +5,7 @@
 #include "ttmlir/Conversion/TTNNToEmitC/TTNNToEmitC.h"
 
 #include "ttmlir/Conversion/TTNNToEmitC/Utils.h"
+#include "ttmlir/Dialect/TT/IR/TTOps.h"
 #include "ttmlir/Dialect/TT/IR/TTOpsDialect.h.inc"
 #include "ttmlir/Dialect/TTNN/IR/TTNN.h"
 #include "ttmlir/Dialect/TTNN/IR/TTNNOps.h"
@@ -576,6 +577,42 @@ class ArithConstantOpConversionPattern
   }
 };
 
+class GetTupleElementOpConversionPattern
+    : public OpConversionPattern<tt::GetTupleElementOp> {
+
+public:
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(tt::GetTupleElementOp getTupleElementOp,
+                  tt::GetTupleElementOp::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    // SubscriptOp requires a Value object as index, which is created by
+    // invoking the emitc::LiteralOp
+    //
+    Value indexAsVal = rewriter.create<emitc::LiteralOp>(
+        getTupleElementOp->getLoc(), rewriter.getIndexType(),
+        std::to_string(adaptor.getIndex()));
+
+    // SubscriptOp also returns an emitc::LValueType, so we wrap the OpaqueType
+    // with LValueType
+    //
+    emitc::LValueType lvalueReturnType = emitc::LValueType::get(
+        emitc::OpaqueType::get(rewriter.getContext(), "ttnn::Tensor"));
+    Value subscript = rewriter.create<emitc::SubscriptOp>(
+        getTupleElementOp->getLoc(), lvalueReturnType, adaptor.getOperand(),
+        indexAsVal);
+
+    // As SubscriptOp returns an LValueType, we need to convert it to an
+    // OpaqueType - this is done by invoking the emitc::LoadOp
+    //
+    rewriter.replaceOpWithNewOp<emitc::LoadOp>(
+        getTupleElementOp, emitc::OpaqueType::get(getContext(), "ttnn::Tensor"),
+        subscript);
+    return success();
+  }
+};
+
 // Module Op conversion pattern
 //
 // This conversion pattern removes attributes from the ModuleOp. Previously,
@@ -724,10 +761,6 @@ void populateTTNNToEmitCPatterns(mlir::MLIRContext *ctx,
   patterns.add<DefaultOpConversionPattern<ttnn::MeshShardOp>>(typeConverter,
                                                               ctx);
 
-  // Module op
-  //
-  patterns.add<ModuleOpConversionPattern>(typeConverter, ctx);
-
   // KV Cache ops
   //
   patterns.add<DefaultOpConversionPattern<ttnn::UpdateCacheOp>>(typeConverter,
@@ -738,6 +771,14 @@ void populateTTNNToEmitCPatterns(mlir::MLIRContext *ctx,
   // Arith ops
   //
   patterns.add<ArithConstantOpConversionPattern>(typeConverter, ctx);
+
+  // Module op
+  //
+  patterns.add<ModuleOpConversionPattern>(typeConverter, ctx);
+
+  // Tuple ops
+  //
+  patterns.add<GetTupleElementOpConversionPattern>(typeConverter, ctx);
 }
 
 } // namespace mlir::tt
diff --git a/lib/Conversion/TTNNToEmitC/TTNNToEmitCPass.cpp b/lib/Conversion/TTNNToEmitC/TTNNToEmitCPass.cpp
index bd0c9044fc..95e722d846 100644
--- a/lib/Conversion/TTNNToEmitC/TTNNToEmitCPass.cpp
+++ b/lib/Conversion/TTNNToEmitC/TTNNToEmitCPass.cpp
@@ -13,6 +13,7 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Func/Transforms/FuncConversions.h"
 #include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/MLIRContext.h"
 #include "mlir/Support/LogicalResult.h"
 #include "mlir/Transforms/DialectConversion.h"
@@ -40,6 +41,9 @@ class TTNNToEmitCTypeConverter : public TypeConverter {
     addConversion([ctx](mlir::TensorType type) -> emitc::OpaqueType {
       return emitc::OpaqueType::get(ctx, "ttnn::Tensor");
     });
+    addConversion([ctx](mlir::TupleType type) -> emitc::OpaqueType {
+      return emitc::OpaqueType::get(ctx, "std::vector<ttnn::Tensor>");
+    });
   }
 };
 
diff --git a/lib/Dialect/LLVM/Transforms/EmitHelperFuncs.cpp b/lib/Dialect/LLVM/Transforms/EmitHelperFuncs.cpp
index cd54986f65..19cdaf8e98 100644
--- a/lib/Dialect/LLVM/Transforms/EmitHelperFuncs.cpp
+++ b/lib/Dialect/LLVM/Transforms/EmitHelperFuncs.cpp
@@ -41,7 +41,8 @@ void generateLLVMHelpersForArgRanks(mlir::ModuleOp moduleOp) {
     builder.setInsertionPointToEnd(moduleOp.getBody());
 
     // Define the helper function name and type
-    std::string helperName = func.getName().str() + "_helper";
+    llvm::SmallString<32> helperName(func.getName());
+    helperName.append("_helper");
 
     // Create the helper function
     auto helperFuncType = LLVM::LLVMFunctionType::get(
diff --git a/lib/Dialect/TT/IR/TTDialect.cpp b/lib/Dialect/TT/IR/TTDialect.cpp
index 1ac8a22239..764e951432 100644
--- a/lib/Dialect/TT/IR/TTDialect.cpp
+++ b/lib/Dialect/TT/IR/TTDialect.cpp
@@ -31,21 +31,6 @@ struct TTOpAsmDialectInterface : public OpAsmDialectInterface {
       os << mlir::cast<IteratorTypeAttr>(attr).getValue();
       return AliasResult::OverridableAlias;
     }
-    if (llvm::isa<OperandConstraintAttr>(attr)) {
-      auto value = mlir::cast<OperandConstraintAttr>(attr).getValue();
-      if (value == OperandConstraint::Any) {
-        os << "any";
-      } else if (value == OperandConstraint::AnyDevice) {
-        os << "any_device";
-      } else if (value == OperandConstraint::AnyDeviceTile) {
-        os << "any_device_tile";
-      } else if (value == OperandConstraint::L1BlockSharded) {
-        os << "l1_block_sharded";
-      } else {
-        os << "operand_constraint";
-      }
-      return AliasResult::OverridableAlias;
-    }
     if (llvm::isa<DeviceAttr>(attr)) {
       os << "device";
       return AliasResult::OverridableAlias;
diff --git a/lib/Dialect/TT/IR/TTOps.cpp b/lib/Dialect/TT/IR/TTOps.cpp
index 6f15f813ef..b4f3b951d6 100644
--- a/lib/Dialect/TT/IR/TTOps.cpp
+++ b/lib/Dialect/TT/IR/TTOps.cpp
@@ -7,3 +7,28 @@
 
 #define GET_OP_CLASSES
 #include "ttmlir/Dialect/TT/IR/TTOps.cpp.inc"
+
+namespace mlir::tt {
+
+LogicalResult GetTupleElementOp::inferReturnTypes(
+    MLIRContext *, std::optional<Location> location, ValueRange operands,
+    DictionaryAttr attributes, OpaqueProperties properties, RegionRange regions,
+    SmallVectorImpl<Type> &inferredReturnTypes) {
+
+  GetTupleElementOp::Adaptor adaptor(operands, attributes, properties, regions);
+
+  auto operandType = dyn_cast<TupleType>(adaptor.getOperand().getType());
+  if (!operandType) {
+    return failure();
+  }
+  if (adaptor.getIndex() >= static_cast<int64_t>(operandType.size())) {
+    return emitOptionalError(location, "index ", adaptor.getIndex(),
+                             " is out of bounds of operand with size ",
+                             operandType.size());
+  }
+
+  inferredReturnTypes.push_back(operandType.getType(adaptor.getIndex()));
+  return success();
+}
+
+} // namespace mlir::tt
diff --git a/lib/Dialect/TTIR/Transforms/Generic.cpp b/lib/Dialect/TTIR/Transforms/Generic.cpp
index 15064ed346..6fc3eb83b3 100644
--- a/lib/Dialect/TTIR/Transforms/Generic.cpp
+++ b/lib/Dialect/TTIR/Transforms/Generic.cpp
@@ -149,23 +149,6 @@ class TTIRKernelGenericRewriter : public OpRewritePattern<KernelOp> {
     llvm_unreachable("Unsupported kernel kind");
   }
 
-  static ArrayAttr createOperandConstraints(PatternRewriter &rewriter,
-                                            StringRef kind,
-                                            mlir::OperandRange operands) {
-    auto numOperands = operands.size();
-    if (kind == "eltwise") {
-      return rewriter.getArrayAttr(SmallVector<Attribute>(
-          numOperands, rewriter.getAttr<OperandConstraintAttr>(
-                           OperandConstraint::AnyDevice)));
-    }
-    if (kind == "matmul") {
-      return rewriter.getArrayAttr(SmallVector<Attribute>(
-          numOperands, rewriter.getAttr<OperandConstraintAttr>(
-                           OperandConstraint::AnyDeviceTile)));
-    }
-    llvm_unreachable("Unsupported kernel kind");
-  }
-
   LogicalResult matchAndRewrite(KernelOp op,
                                 PatternRewriter &rewriter) const final {
     if (mlir::isa<GenericOp>(op.getOperation()->getParentOp())) {
@@ -175,12 +158,10 @@ class TTIRKernelGenericRewriter : public OpRewritePattern<KernelOp> {
     // Create a dispatch op
     auto [indexingMaps, iteratorTypes] =
         createIndexingMaps(rewriter, op.getKind(), op.getOperands());
-    auto constraints =
-        createOperandConstraints(rewriter, op.getKind(), op.getOperands());
     auto dispatch = rewriter.create<ttir::GenericOp>(
         op.getLoc(), op.getResults().getTypes(), op.getInputs(),
         ValueRange() /* cbs */, op.getOutputs(), rewriter.getAttr<GridAttr>(),
-        indexingMaps, iteratorTypes, constraints);
+        indexingMaps, iteratorTypes);
 
     // Create a new basic block for the dispatch op and create block arguments
     Block *block = rewriter.createBlock(&dispatch.getRegion());
@@ -246,9 +227,6 @@ class TTIRGenericRegionRewriter
 
     // Create a generic op.
     auto [indexingMaps, iteratorTypes] = op.getIndexingMaps(rewriter);
-    auto constraints = rewriter.getArrayAttr(SmallVector<Attribute>(
-        op->getNumOperands(), rewriter.getAttr<OperandConstraintAttr>(
-                                  OperandConstraint::AnyDeviceTile)));
 
     // For testing purposes try getting grid of the resulting tensor and put the
     // op in the grid.
@@ -264,7 +242,7 @@ class TTIRGenericRegionRewriter
     auto genericOp = rewriter.create<ttir::GenericOp>(
         op.getLoc(), op->getResults().getTypes(), dps.getDpsInputs(),
         ValueRange() /* cbs */, dps.getDpsInits(), gridAttr, indexingMaps,
-        iteratorTypes, constraints);
+        iteratorTypes);
 
     // Create a new basic block for the generic op and create block arguments.
     Block *block = rewriter.createBlock(&genericOp.getRegion());
@@ -439,14 +417,8 @@ class TTIRGenericOpCBsRewriter : public OpRewritePattern<GenericOp> {
 
     SmallVector<Value> cbValues;
     SmallVector<int64_t> operandCBMapping;
-    SmallVector<Attribute> oldConstraints;
-    SmallVector<Attribute> cbConstraints;
-    size_t i = 0;
 
     for (auto operand : generic->getOperands()) {
-      size_t operandIdx = i++;
-      oldConstraints.push_back(generic.getOperandConstraints()[operandIdx]);
-
       auto ty = mlir::cast<RankedTensorType>(operand.getType());
 
       // Enforcing tiled layout as in kernel we always want to work with tiles.
@@ -472,42 +444,14 @@ class TTIRGenericOpCBsRewriter : public OpRewritePattern<GenericOp> {
           generic->getLoc(), ty.getShape(), ty.getElementType(), desiredLayout);
       cbValues.push_back(emptyOp.getResult());
       operandCBMapping.push_back(cbValues.size() - 1);
-
-      // Inheriting constraints from the original operand.
-      // OperandConstraint inherittedConstraint =
-      //     mlir::cast<OperandConstraintAttr>(
-      //         generic.getOperandConstraints()[operandIdx])
-      //         .getValue();
-      // inherittedConstraint =
-      //     bitEnumSet(inherittedConstraint, OperandConstraint::L1);
-      // inherittedConstraint =
-      //     bitEnumClear(inherittedConstraint, OperandConstraint::DRAM);
-      // inherittedConstraint =
-      //     bitEnumClear(inherittedConstraint, OperandConstraint::System);
-
-      // Fixing constraint to L1 for the CB operand.
-      // TODO(rpavlovic) remove or use code above when we decide on the operand
-      // constraints model.
-      cbConstraints.push_back(
-          rewriter.getAttr<OperandConstraintAttr>(OperandConstraint::L1));
     }
 
-    SmallVector<Attribute> combinedConstraints;
-    combinedConstraints.append(oldConstraints.begin(),
-                               oldConstraints.begin() +
-                                   generic.getInputs().size());
-    combinedConstraints.append(cbConstraints.begin(), cbConstraints.end());
-    combinedConstraints.append(oldConstraints.begin() +
-                                   generic.getInputs().size(),
-                               oldConstraints.end());
-    auto newConstraintsArray = rewriter.getArrayAttr(combinedConstraints);
-
     rewriter.setInsertionPointAfter(generic);
     auto newGenericOp = rewriter.create<ttir::GenericOp>(
         generic->getLoc(), generic.getResultTypes(), generic.getInputs(),
         cbValues, generic.getOutputs(), generic.getGrid(),
         generic.getIndexingMaps(), generic.getIteratorTypes(),
-        newConstraintsArray, operandCBMapping);
+        operandCBMapping);
 
     auto &oldRegion = generic.getRegion();
     newGenericOp->getRegion(0).takeBody(oldRegion);
diff --git a/lib/Dialect/TTIR/Transforms/Layout.cpp b/lib/Dialect/TTIR/Transforms/Layout.cpp
index eca9747304..3dc87f03a2 100644
--- a/lib/Dialect/TTIR/Transforms/Layout.cpp
+++ b/lib/Dialect/TTIR/Transforms/Layout.cpp
@@ -3,7 +3,7 @@
 // SPDX-License-Identifier: Apache-2.0
 
 #include "ttmlir/Dialect/TT/IR/TT.h"
-#include "ttmlir/Dialect/TT/Utils/OperandConstraints.h"
+#include "ttmlir/Dialect/TTIR/IR/TTIROps.h"
 #include "ttmlir/Dialect/TTIR/Transforms/Passes.h"
 
 #include "mlir/Dialect/Func/IR/FuncOps.h"
diff --git a/lib/Dialect/TTNN/Analysis/BFInterleavedPolicy.cpp b/lib/Dialect/TTNN/Analysis/BFInterleavedPolicy.cpp
new file mode 100644
index 0000000000..4d58d7b5aa
--- /dev/null
+++ b/lib/Dialect/TTNN/Analysis/BFInterleavedPolicy.cpp
@@ -0,0 +1,206 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "ttmlir/Dialect/TTNN/Analysis/BFInterleavedPolicy.h"
+#include "ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h"
+#include "ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.h"
+#include "ttmlir/Dialect/TTNN/Utils/Utils.h"
+#include "ttmlir/Scheduler/Scheduler.h"
+
+namespace mlir::tt::ttnn {
+
+void BFInterleavedPolicy::run() {
+  for (Operation &funcOp : rootOp->getRegion(0).getOps()) {
+    func::FuncOp func = dyn_cast<func::FuncOp>(funcOp);
+    mlir::tt::scheduler::Scheduler scheduler(&func);
+    deviceAttr = getCurrentScopeDevice(func);
+
+    // Initialize the policy.
+    //
+    llvm::DenseMap<Operation *, OpL1MemUsage> currentL1UsagePerOp;
+    uint64_t currentL1Usage = 0;
+    l1ChainConfigs->push_back(L1ChainConfig());
+
+    while (scheduler.hasUnscheduledOps()) {
+      uint64_t minimalChangeInL1Usage;
+      Operation *nextOpForScheduling;
+      BufferType nextOpForSchedulingBufferType;
+
+      nextOpForScheduling = nullptr;
+      minimalChangeInL1Usage = std::numeric_limits<uint64_t>::max();
+      for (Operation *op : scheduler.getScheduleableOps()) {
+        uint64_t deallocOfL1Mem, allocOfL1Mem, changeInL1Usage;
+        BufferType opBufferType;
+
+        // Calculate the L1 memory usage of the op's operands.
+        //
+        deallocOfL1Mem = 0;
+        walkOnAnalyzableOperands(op, [&](Operation *operandOp) {
+          if (currentL1UsagePerOp.count(operandOp)) {
+            deallocOfL1Mem +=
+                (currentL1UsagePerOp[operandOp].numOfUnscheduledUsers == 1) *
+                currentL1UsagePerOp[operandOp].l1MemUsagePerUser;
+          }
+        });
+
+        // Default setup for all DRAM buffer type ops.
+        //
+        allocOfL1Mem = 0;
+        opBufferType = BufferType::DRAM;
+
+        // Analyse the possibility of scheduling the op with L1 memory layout.
+        //
+        if (hasL1BufferType(op)) {
+          TTNNLayoutAttr layout = getL1InterleavedLayout(op);
+          uint64_t opOutputL1Usage =
+              utils::getOpOutputL1Usage(op, layout, deviceAttr);
+
+          if (currentL1Usage + opOutputL1Usage <= getAvailableL1CacheSize()) {
+            allocOfL1Mem = opOutputL1Usage;
+            opBufferType = BufferType::L1;
+          }
+        }
+
+        // Check if the scheduling of the op is consuming the least amount of L1
+        // memory among all the scheduleable ops.
+        //
+        changeInL1Usage = allocOfL1Mem - deallocOfL1Mem;
+        if (changeInL1Usage < minimalChangeInL1Usage) {
+          nextOpForScheduling = op;
+          nextOpForSchedulingBufferType = opBufferType;
+          minimalChangeInL1Usage = changeInL1Usage;
+        }
+      }
+
+      // In case we picked the L1 layout for the nextOpForScheduling, we need
+      // to add the OpL1MemSpec to the L1ChainConfig and update the state of L1
+      // memory.
+      //
+      if (nextOpForSchedulingBufferType == BufferType::L1) {
+
+        // Construct OpL1MemSpec for the nextOpForScheduling.
+        //
+        OpL1MemSpec opL1MemSpec;
+        opL1MemSpec.op = nextOpForScheduling;
+        opL1MemSpec.layout = getL1InterleavedLayout(nextOpForScheduling);
+        l1ChainConfigs->back().addOpL1MemSpec(opL1MemSpec);
+
+        // Update the state of L1 memory by allocating the nextOpForScheduling's
+        // output tensor in L1 memory.
+        //
+        uint64_t numOfUsers = std::distance(nextOpForScheduling->user_begin(),
+                                            nextOpForScheduling->user_end());
+        currentL1UsagePerOp[nextOpForScheduling].l1MemUsagePerUser =
+            utils::getOpOutputL1Usage(nextOpForScheduling, opL1MemSpec.layout,
+                                      deviceAttr);
+        currentL1UsagePerOp[nextOpForScheduling].numOfUnscheduledUsers =
+            numOfUsers;
+        currentL1Usage +=
+            currentL1UsagePerOp[nextOpForScheduling].l1MemUsagePerUser;
+      }
+
+      // Update the state of L1 memory.
+      //
+      walkOnAnalyzableOperands(nextOpForScheduling, [&](Operation *operandOp) {
+        if (currentL1UsagePerOp.count(operandOp)) {
+          currentL1UsagePerOp[operandOp].numOfUnscheduledUsers -= 1;
+          if (currentL1UsagePerOp[operandOp].numOfUnscheduledUsers == 0) {
+            // Only once we scheduled all the users of the operandOp, we can
+            // free its output tensor from L1 memory.
+            //
+            currentL1Usage -= currentL1UsagePerOp[operandOp].l1MemUsagePerUser;
+            currentL1UsagePerOp.erase(operandOp);
+          }
+        }
+      });
+
+      // Schedule the nextOpForScheduling and update currentL1Usage.
+      //
+      scheduler.scheduleOp(nextOpForScheduling);
+    }
+
+    assert(currentL1Usage == 0);
+    assert(currentL1UsagePerOp.size() == 0);
+
+    (*schedule)[func] = scheduler.getSchedule();
+
+    // Build, Resolve and Complete all L1ChainConfigs.
+    //
+    for (L1ChainConfig &l1ChainConfig : *l1ChainConfigs) {
+      l1ChainConfig.build();
+      l1ChainConfig.resolve();
+      l1ChainConfig.complete();
+    }
+  }
+}
+
+bool BFInterleavedPolicy::isAnalyzable(Operation *op) {
+  // Skip operations that are not analyzed by the LegalGridAnalysis.
+  //
+  if (legalLayouts.count(op) > 0) {
+    // Skip operations that are filterd out by the MemoryLayoutAnalysis.
+    //
+    return legalLayouts[op].size() > 0;
+  }
+  return false;
+}
+
+void BFInterleavedPolicy::walkOnAnalyzableOperands(
+    Operation *op, function_ref<void(Operation *)> callback) {
+  for (auto operand : op->getOperands()) {
+    // Skip block arguments (%arg0, %arg1, ...)
+    //
+    if (::llvm::isa<mlir::BlockArgument>(operand)) {
+      continue;
+    }
+
+    Operation *operandOp = operand.getDefiningOp();
+
+    // Skip non-analyzable operands.
+    //
+    if (isAnalyzable(operandOp)) {
+      callback(operandOp);
+    }
+  }
+}
+
+bool BFInterleavedPolicy::hasDRAMBufferType(Operation *op) {
+  if (legalLayouts.count(op)) {
+    return std::find_if(legalLayouts[op].begin(), legalLayouts[op].end(),
+                        [](TTNNLayoutAttr layout) {
+                          return layout.hasDRAMBufferType();
+                        }) != legalLayouts[op].end();
+  }
+  return false;
+}
+
+TTNNLayoutAttr BFInterleavedPolicy::getDRAMLayout(Operation *op) {
+  assert(hasDRAMBufferType(op));
+  auto dramLayoutIter = std::find_if(
+      legalLayouts[op].begin(), legalLayouts[op].end(),
+      [](TTNNLayoutAttr layout) { return layout.hasDRAMBufferType(); });
+  return *dramLayoutIter;
+}
+
+bool BFInterleavedPolicy::hasL1BufferType(Operation *op) {
+  if (legalLayouts.count(op)) {
+    return std::find_if(legalLayouts[op].begin(), legalLayouts[op].end(),
+                        [](TTNNLayoutAttr layout) {
+                          return layout.hasInterleavedL1TensorMemoryLayout();
+                        }) != legalLayouts[op].end();
+  }
+  return false;
+}
+
+TTNNLayoutAttr BFInterleavedPolicy::getL1InterleavedLayout(Operation *op) {
+  assert(hasL1BufferType(op));
+  auto l1InterleaveLayoutIter =
+      std::find_if(legalLayouts[op].begin(), legalLayouts[op].end(),
+                   [](TTNNLayoutAttr layout) {
+                     return layout.hasInterleavedL1TensorMemoryLayout();
+                   });
+  return *l1InterleaveLayoutIter;
+}
+
+} // namespace mlir::tt::ttnn
diff --git a/lib/Dialect/TTNN/Analysis/CMakeLists.txt b/lib/Dialect/TTNN/Analysis/CMakeLists.txt
index 4db2d78b9c..04cf9c0d22 100644
--- a/lib/Dialect/TTNN/Analysis/CMakeLists.txt
+++ b/lib/Dialect/TTNN/Analysis/CMakeLists.txt
@@ -4,7 +4,8 @@ add_mlir_dialect_library(MLIRTTNNAnalysis
         MemoryLayoutAnalysis.cpp
         L1ChainConfig.cpp
         DFShardingPolicy.cpp
-        L1InterleavedPolicy.cpp
+        GreedyL1InterleavedPolicy.cpp
+        BFInterleavedPolicy.cpp
         ShardSolver.cpp
 
         ADDITIONAL_HEADER_DIRS
diff --git a/lib/Dialect/TTNN/Analysis/DFShardingPolicy.cpp b/lib/Dialect/TTNN/Analysis/DFShardingPolicy.cpp
index 8d5f22bfc4..bd74c5a00f 100644
--- a/lib/Dialect/TTNN/Analysis/DFShardingPolicy.cpp
+++ b/lib/Dialect/TTNN/Analysis/DFShardingPolicy.cpp
@@ -10,7 +10,7 @@ namespace mlir::tt::ttnn {
 
 void DFShardingPolicy::run() {
   rootOp->walk([&](func::FuncOp func) {
-    DeviceAttr deviceAttr = getCurrentScopeDevice(func);
+    deviceAttr = getCurrentScopeDevice(func);
     mlir::tt::scheduler::Scheduler scheduler(&func);
     l1ChainConfigs->push_back(L1ChainConfig());
     llvm::SmallVector<mlir::Operation *> scheduleableOps;
diff --git a/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp b/lib/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.cpp
similarity index 85%
rename from lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp
rename to lib/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.cpp
index 69a07af168..5606132906 100644
--- a/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp
+++ b/lib/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.cpp
@@ -2,33 +2,14 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
-#include "ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h"
+#include "ttmlir/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.h"
 #include "ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h"
+#include "ttmlir/Dialect/TTNN/Utils/Utils.h"
 #include "ttmlir/Scheduler/Scheduler.h"
 
 namespace mlir::tt::ttnn {
 
-uint64_t getOpOutputL1Usage(Operation *op, TTNNLayoutAttr opLayout,
-                            DeviceAttr &deviceAttr) {
-  // In case the opLayout is not in L1 memory space, L1 memory usage is 0.
-  //
-  if (opLayout.hasDRAMBufferType()) {
-    return 0;
-  }
-
-  // L1 memory usage of the ops without output tensors cannot be calculated.
-  // So far, this is only false for ttnn.get_device op.
-  //
-  assert(mlir::isa<RankedTensorType>(op->getResult(0).getType()));
-  llvm::ArrayRef<int64_t> opOutputTensorShape =
-      mlir::cast<RankedTensorType>(op->getResult(0).getType()).getShape();
-
-  uint64_t opL1OutputUsage =
-      opLayout.getTensorSizeInBytes(opOutputTensorShape, deviceAttr);
-  return opL1OutputUsage;
-}
-
-L1InterleavedPolicy::OpConfig L1InterleavedPolicy::getGreedyConfig(
+GreedyL1InterleavedPolicy::OpConfig GreedyL1InterleavedPolicy::getGreedyConfig(
     Operation *baseOp, llvm::DenseMap<Operation *, L1Usage> &opsL1Usage) {
   uint64_t numOfOps, bitIndex, currentMask;
   uint64_t currentL1Usage, optimalL1Usage;
@@ -146,10 +127,10 @@ L1InterleavedPolicy::OpConfig L1InterleavedPolicy::getGreedyConfig(
   return optimalConfig;
 }
 
-void L1InterleavedPolicy::run() {
+void GreedyL1InterleavedPolicy::run() {
   for (Operation &funcOp : rootOp->getRegion(0).getOps()) {
     func::FuncOp func = dyn_cast<func::FuncOp>(funcOp);
-    DeviceAttr deviceAttr = getCurrentScopeDevice(func);
+    deviceAttr = getCurrentScopeDevice(func);
 
     // Start the policy.
     //
@@ -185,8 +166,8 @@ void L1InterleavedPolicy::run() {
 
           if (op->hasOneUse() && hasL1BufferType(op)) {
             L1Usage l1Usage;
-            l1Usage.outputL1Usage =
-                getOpOutputL1Usage(op, getL1InterleavedLayout(op), deviceAttr);
+            l1Usage.outputL1Usage = utils::getOpOutputL1Usage(
+                op, getL1InterleavedLayout(op), deviceAttr);
             l1Usage.requiredL1Usage = 0;
             opsL1Usage[op] = l1Usage;
           }
@@ -211,8 +192,8 @@ void L1InterleavedPolicy::run() {
             //
             if (operandOpLayout.hasInterleavedL1TensorMemoryLayout()) {
               L1Usage l1Usage;
-              l1Usage.outputL1Usage =
-                  getOpOutputL1Usage(operandOp, operandOpLayout, deviceAttr);
+              l1Usage.outputL1Usage = utils::getOpOutputL1Usage(
+                  operandOp, operandOpLayout, deviceAttr);
               l1Usage.requiredL1Usage = OpMemSpecMap[operandOp].requiredL1Usage;
               opsL1Usage[operandOp] = l1Usage;
             }
@@ -271,14 +252,15 @@ void L1InterleavedPolicy::run() {
                   std::max(intermediateRequiredL1Usage,
                            intermediateL1Usage +
                                OpMemSpecMap[operandOp].requiredL1Usage);
-              intermediateL1Usage += getOpOutputL1Usage(
+              intermediateL1Usage += utils::getOpOutputL1Usage(
                   operandOp, OpMemSpecMap[operandOp].layout, deviceAttr);
             }
           }
-          OpMemSpecMap[op].requiredL1Usage = std::max(
-              intermediateRequiredL1Usage,
-              intermediateL1Usage +
-                  getOpOutputL1Usage(op, OpMemSpecMap[op].layout, deviceAttr));
+          OpMemSpecMap[op].requiredL1Usage =
+              std::max(intermediateRequiredL1Usage,
+                       intermediateL1Usage +
+                           utils::getOpOutputL1Usage(
+                               op, OpMemSpecMap[op].layout, deviceAttr));
         }
       }
     }
@@ -308,8 +290,8 @@ void L1InterleavedPolicy::run() {
   }
 }
 
-bool L1InterleavedPolicy::isAnalyzable(Operation *op) {
-  // Skip operations that are not analyzed by the LegalLayoutAnalysis.
+bool GreedyL1InterleavedPolicy::isAnalyzable(Operation *op) {
+  // Skip operations that are not analyzed by the LegalGridAnalysis.
   //
   if (legalLayouts.count(op) > 0) {
     // Skip operations that are filterd out by the MemoryLayoutAnalysis.
@@ -319,14 +301,14 @@ bool L1InterleavedPolicy::isAnalyzable(Operation *op) {
   return false;
 }
 
-bool L1InterleavedPolicy::hasDRAMBufferType(Operation *op) {
+bool GreedyL1InterleavedPolicy::hasDRAMBufferType(Operation *op) {
   return std::find_if(legalLayouts[op].begin(), legalLayouts[op].end(),
                       [](TTNNLayoutAttr layout) {
                         return layout.hasDRAMBufferType();
                       }) != legalLayouts[op].end();
 }
 
-TTNNLayoutAttr L1InterleavedPolicy::getDRAMLayout(Operation *op) {
+TTNNLayoutAttr GreedyL1InterleavedPolicy::getDRAMLayout(Operation *op) {
   assert(hasDRAMBufferType(op));
   auto dramLayoutIter = std::find_if(
       legalLayouts[op].begin(), legalLayouts[op].end(),
@@ -334,14 +316,15 @@ TTNNLayoutAttr L1InterleavedPolicy::getDRAMLayout(Operation *op) {
   return *dramLayoutIter;
 }
 
-bool L1InterleavedPolicy::hasL1BufferType(Operation *op) {
+bool GreedyL1InterleavedPolicy::hasL1BufferType(Operation *op) {
   return std::find_if(legalLayouts[op].begin(), legalLayouts[op].end(),
                       [](TTNNLayoutAttr layout) {
                         return layout.hasInterleavedL1TensorMemoryLayout();
                       }) != legalLayouts[op].end();
 }
 
-TTNNLayoutAttr L1InterleavedPolicy::getL1InterleavedLayout(Operation *op) {
+TTNNLayoutAttr
+GreedyL1InterleavedPolicy::getL1InterleavedLayout(Operation *op) {
   assert(hasL1BufferType(op));
   auto l1InterleaveLayoutIter =
       std::find_if(legalLayouts[op].begin(), legalLayouts[op].end(),
diff --git a/lib/Dialect/TTNN/Analysis/L1ChainConfig.cpp b/lib/Dialect/TTNN/Analysis/L1ChainConfig.cpp
index 534f67a5fe..bf2eb9790a 100644
--- a/lib/Dialect/TTNN/Analysis/L1ChainConfig.cpp
+++ b/lib/Dialect/TTNN/Analysis/L1ChainConfig.cpp
@@ -50,4 +50,18 @@ void L1ChainConfig::complete(
   state = L1ChainState::Completed;
 }
 
+void L1ChainConfig::complete() {
+  assert(state == L1ChainState::Resolved);
+  state = L1ChainState::Completed;
+}
+
+void L1ChainConfig::merge(L1ChainConfig &other) {
+  assert(getState() == other.getState());
+  opL1MemSpecs.insert(opL1MemSpecs.end(), other.opL1MemSpecs.begin(),
+                      other.opL1MemSpecs.end());
+  l1ChainedOps.insert(other.l1ChainedOps.begin(), other.l1ChainedOps.end());
+  memReconfigEdges.insert(other.memReconfigEdges.begin(),
+                          other.memReconfigEdges.end());
+}
+
 } // namespace mlir::tt::ttnn
diff --git a/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp b/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp
index f3db4ed7bf..ef7e3d1c11 100644
--- a/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp
+++ b/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp
@@ -3,8 +3,9 @@
 // SPDX-License-Identifier: Apache-2.0
 
 #include "ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h"
+#include "ttmlir/Dialect/TTNN/Analysis/BFInterleavedPolicy.h"
 #include "ttmlir/Dialect/TTNN/Analysis/DFShardingPolicy.h"
-#include "ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h"
+#include "ttmlir/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.h"
 #include "ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.h"
 
 namespace mlir::tt::ttnn {
@@ -68,14 +69,22 @@ void MemoryLayoutAnalysis::analysisImplementation() {
     dfShardingPolicy.run();
     break;
   }
-  case MemoryLayoutAnalysisPolicyType::L1Interleaved: {
-    L1InterleavedPolicy l1InterleavedPolicy(
+  case MemoryLayoutAnalysisPolicyType::GreedyL1Interleaved: {
+    GreedyL1InterleavedPolicy l1InterleavedPolicy(
         op, l1ChainConfigs,
         filterDRAMAndL1Interleaved(analysisInput.legalLayouts),
         analysisResult.schedule, analysisInput.usableL1CacheSize);
     l1InterleavedPolicy.run();
     break;
   }
+  case MemoryLayoutAnalysisPolicyType::BFInterleaved: {
+    BFInterleavedPolicy bfInterleavedPolicy(
+        op, l1ChainConfigs,
+        filterDRAMAndL1Interleaved(analysisInput.legalLayouts),
+        analysisResult.schedule, analysisInput.usableL1CacheSize);
+    bfInterleavedPolicy.run();
+    break;
+  }
   }
 
   // Copy over default legal layouts.
diff --git a/lib/Dialect/TTNN/Transforms/Passes.cpp b/lib/Dialect/TTNN/Transforms/Passes.cpp
index 20172f4fd1..f35768d63d 100644
--- a/lib/Dialect/TTNN/Transforms/Passes.cpp
+++ b/lib/Dialect/TTNN/Transforms/Passes.cpp
@@ -4,6 +4,8 @@
 
 #include "ttmlir/Dialect/TTNN/Transforms/Passes.h"
 
+#include "ttmlir/Dialect/TT/IR/TTOps.h"
+#include "ttmlir/Dialect/TT/IR/TTOpsTypes.h"
 #include "ttmlir/Dialect/TTNN/IR/TTNNOps.h"
 #include "ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.h"
 #include "ttmlir/Dialect/TTNN/IR/TTNNOpsTypes.h"
@@ -12,6 +14,7 @@
 #include "mlir/Analysis/Liveness.h"
 #include "mlir/Dialect/Arith/Utils/Utils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/IR/TypeRange.h"
@@ -24,6 +27,7 @@ namespace mlir::tt::ttnn {
 #define GEN_PASS_DEF_TTNNDEALLOCATE
 #define GEN_PASS_DEF_TTNNDECOMPOSELAYOUTS
 #define GEN_PASS_DEF_TTNNCREATEINPUTGENERATORS
+#define GEN_PASS_DEF_TTNNMODIFYSIGNATURESFORDYLIB
 #include "ttmlir/Dialect/TTNN/Transforms/Passes.h.inc"
 
 class TTNNDeallocate : public impl::TTNNDeallocateBase<TTNNDeallocate> {
@@ -906,7 +910,7 @@ class TTNNCreateInputGenerators
     //
     Block *firstBlock = module.getBody(0);
 
-    // Find all the func.func ops in the module
+    // Find all the func.func ops in the module that are "forward" functions
     //
     SmallVector<func::FuncOp, 1> forwardFuncOps;
     for (mlir::Operation &op : firstBlock->getOperations()) {
@@ -1065,4 +1069,96 @@ class TTNNCreateInputGenerators
   }
 };
 
+class TTNNModifySignaturesForDylib
+    : public impl::TTNNModifySignaturesForDylibBase<
+          TTNNModifySignaturesForDylib> {
+
+public:
+  using impl::TTNNModifySignaturesForDylibBase<
+      TTNNModifySignaturesForDylib>::TTNNModifySignaturesForDylibBase;
+
+  void runOnOperation() final {
+    ModuleOp module = getOperation();
+    IRRewriter rewriter(&getContext());
+
+    // Ensure that the module has a single region and a single block within that
+    // region
+    assert(module->getRegions().size() == 1);
+    assert(module->getRegion(0).getBlocks().size() == 1);
+
+    // Get the first block of the region at index 0
+    //
+    Block *firstBlock = module.getBody(0);
+
+    // Find all the func.func ops in the module that are "forward" functions
+    //
+    SmallVector<func::FuncOp, 1> forwardFuncOps;
+    for (mlir::Operation &op : firstBlock->getOperations()) {
+      if (mlir::func::FuncOp funcOp = dyn_cast<func::FuncOp>(op)) {
+
+        // Skip functions that are called elsewhere in the IR
+        //
+        // This will skip utility functions that are used by other functions,
+        // only top-level "forward" functions should be considered
+        //
+        if (!funcOp->getUses().empty()) {
+          continue;
+        }
+
+        forwardFuncOps.push_back(funcOp);
+      }
+    }
+
+    // Iterate over all the func ops and modify the signatures
+    //
+    for (mlir::func::FuncOp forwardFuncOp : forwardFuncOps) {
+      // Replace the signature of the forward function so that all the tensor
+      // arguments are packed into a single tuple
+      //
+      mlir::FunctionType originalFuncType = forwardFuncOp.getFunctionType();
+      assert(
+          std::all_of(originalFuncType.getInputs().begin(),
+                      originalFuncType.getInputs().end(),
+                      [](Type t) { return mlir::isa<RankedTensorType>(t); }) &&
+          "Expected all inputs must be of type RankedTensorType");
+      mlir::TupleType inputTupleType =
+          mlir::TupleType::get(&getContext(), originalFuncType.getInputs());
+      FunctionType tuplifiedFuncType =
+          originalFuncType.clone(inputTupleType, originalFuncType.getResults());
+      rewriter.modifyOpInPlace(forwardFuncOp,
+                               [&forwardFuncOp, &tuplifiedFuncType]() {
+                                 forwardFuncOp.setType(tuplifiedFuncType);
+                               });
+
+      // First block of the function (often referred to as "entry block") needs
+      // its arguments updated as well - the args need to match the containing
+      // func's arguments; this is implemented here by first inserting the tuple
+      // as the first argument of the block, inserting GetTupleElementOp ops to
+      // start of the block in order to unpack tuple elements, and then
+      // replacing all uses of the original block arguments with the
+      // GetTupleElementOp results - after this it's finally safe to remove
+      // original block arguments as they have no live uses anymore
+      //
+      Block &entryBlock = forwardFuncOp.getBlocks().front();
+      entryBlock.insertArgument(/*index=*/0u,
+                                tuplifiedFuncType.getInputs().front(),
+                                forwardFuncOp.getLoc());
+
+      rewriter.setInsertionPointToStart(&entryBlock);
+      for (size_t idx = 0; idx < originalFuncType.getInputs().size(); idx++) {
+        ::mlir::tt::GetTupleElementOp getTupleElementOp =
+            rewriter.create<mlir::tt::GetTupleElementOp>(
+                forwardFuncOp.getLoc(), forwardFuncOp.getArgument(0), idx);
+
+        rewriter.replaceAllUsesWith(entryBlock.getArgument(1 + idx),
+                                    getTupleElementOp);
+      }
+
+      // Erase original arguments
+      //
+      entryBlock.eraseArguments(1, originalFuncType.getInputs().size());
+    }
+  }
+};
+
 } // namespace mlir::tt::ttnn
diff --git a/lib/Dialect/TTNN/Transforms/TTNNLayout.cpp b/lib/Dialect/TTNN/Transforms/TTNNLayout.cpp
index 80b76d6d45..e148b575fb 100644
--- a/lib/Dialect/TTNN/Transforms/TTNNLayout.cpp
+++ b/lib/Dialect/TTNN/Transforms/TTNNLayout.cpp
@@ -2,7 +2,8 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
-#include "ttmlir/Dialect/TT/Utils/OperandConstraints.h"
+#include "ttmlir/Dialect/TT/IR/TT.h"
+#include "ttmlir/Dialect/TTIR/IR/TTIROps.h"
 #include "ttmlir/Dialect/TTNN/Transforms/Passes.h"
 
 #include "mlir/Dialect/Func/IR/FuncOps.h"
diff --git a/lib/Dialect/TTNN/Utils/Utils.cpp b/lib/Dialect/TTNN/Utils/Utils.cpp
index 751c714ede..6976dd35f4 100644
--- a/lib/Dialect/TTNN/Utils/Utils.cpp
+++ b/lib/Dialect/TTNN/Utils/Utils.cpp
@@ -117,4 +117,24 @@ createRankedTensorTypeWithEncoding(RankedTensorType tensorType,
                                tensorType.getElementType(), encoding);
 }
 
+uint64_t getOpOutputL1Usage(Operation *op, TTNNLayoutAttr opLayout,
+                            DeviceAttr &deviceAttr) {
+  assert(mlir::isa<RankedTensorType>(op->getResult(0).getType()) &&
+         "L1 memory usage of the ops without output tensors cannot be "
+         "calculated.");
+
+  // In case the opLayout is not in L1 memory space, L1 memory usage is 0.
+  //
+  if (opLayout.hasDRAMBufferType()) {
+    return 0;
+  }
+
+  llvm::ArrayRef<int64_t> opOutputTensorShape =
+      mlir::cast<RankedTensorType>(op->getResult(0).getType()).getShape();
+
+  uint64_t opL1OutputUsage =
+      opLayout.getTensorSizeInBytes(opOutputTensorShape, deviceAttr);
+  return opL1OutputUsage;
+}
+
 } // namespace mlir::tt::ttnn::utils
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index cbfc3bf95f..6f3c99345e 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -75,6 +75,12 @@ declare_mlir_python_sources(TTMLIRPythonSources.Passes
   SOURCES passes.py
 )
 
+declare_mlir_python_sources(TTMLIRPythonSources.Util
+  ROOT_DIR "${TTMLIR_PYTHON_ROOT_DIR}"
+  ADD_TO_PARENT TTMLIRPythonSources
+  SOURCES util.py
+)
+
 declare_mlir_python_sources(TTMLIRPythonTestInfra.TestInfra
   ROOT_DIR "${TTMLIR_PYTHON_TEST_INFRA_ROOT_DIR}"
   ADD_TO_PARENT TTMLIRPythonTestInfra
@@ -95,6 +101,7 @@ declare_mlir_python_extension(TTMLIRPythonExtensions.Main
     Overrides.cpp
     OptimizerOverrides.cpp
     Passes.cpp
+    Util.cpp
   EMBED_CAPI_LINK_LIBS
     MLIRCAPITransforms
     TTMLIRCAPI
diff --git a/python/OptimizerOverrides.cpp b/python/OptimizerOverrides.cpp
index bd5ce94f43..18806654c7 100644
--- a/python/OptimizerOverrides.cpp
+++ b/python/OptimizerOverrides.cpp
@@ -71,8 +71,10 @@ void populateOptimizerOverridesModule(py::module &m) {
   py::enum_<mlir::tt::MemoryLayoutAnalysisPolicyType>(
       m, "MemoryLayoutAnalysisPolicyType")
       .value("DFSharding", mlir::tt::MemoryLayoutAnalysisPolicyType::DFSharding)
-      .value("L1Interleaved",
-             mlir::tt::MemoryLayoutAnalysisPolicyType::L1Interleaved);
+      .value("GreedyL1Interleaved",
+             mlir::tt::MemoryLayoutAnalysisPolicyType::GreedyL1Interleaved)
+      .value("BFInterleaved",
+             mlir::tt::MemoryLayoutAnalysisPolicyType::BFInterleaved);
 
   py::enum_<mlir::tt::ttnn::BufferType>(m, "BufferType")
       .value("DRAM", mlir::tt::ttnn::BufferType::DRAM)
diff --git a/python/TTMLIRModule.cpp b/python/TTMLIRModule.cpp
index 0347da75b5..2805399205 100644
--- a/python/TTMLIRModule.cpp
+++ b/python/TTMLIRModule.cpp
@@ -43,4 +43,6 @@ PYBIND11_MODULE(_ttmlir, m) {
   auto optimizer_overrides = m.def_submodule(
       "optimizer_overrides", "Python-Bound Optimizer Overrides");
   mlir::ttmlir::python::populateOptimizerOverridesModule(optimizer_overrides);
+  auto util = m.def_submodule("util", "Python-Bound Utilities & Helpers");
+  mlir::ttmlir::python::populateUtilModule(util);
 }
diff --git a/python/TTModule.cpp b/python/TTModule.cpp
index 9b4bad5873..f64630bf19 100644
--- a/python/TTModule.cpp
+++ b/python/TTModule.cpp
@@ -397,24 +397,6 @@ void populateTTModule(py::module &m) {
                                return static_cast<uint32_t>(self.getValue());
                              });
 
-  tt_attribute_class<tt::OperandConstraintAttr>(m, "OperandConstraintAttr")
-      .def_static("get",
-                  [](MlirContext ctx, uint32_t operandConstraint) {
-                    return wrap(tt::OperandConstraintAttr::get(
-                        unwrap(ctx),
-                        static_cast<tt::OperandConstraint>(operandConstraint)));
-                  })
-      .def_static(
-          "get",
-          [](MlirContext ctx, std::vector<MlirAttribute> attributesArray) {
-            return ::ttmlir::utils::wrapArrayOfMlirAttributesAsAttribute(
-                ctx, attributesArray);
-          })
-      .def_property_readonly("operand_constraint_as_int",
-                             [](tt::OperandConstraintAttr self) {
-                               return static_cast<uint32_t>(self.getValue());
-                             });
-
   tt_type_class<tt::DeviceType>(m, "DeviceType")
       .def_static(
           "get",
diff --git a/python/Util.cpp b/python/Util.cpp
new file mode 100644
index 0000000000..c562306bc3
--- /dev/null
+++ b/python/Util.cpp
@@ -0,0 +1,41 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "ttmlir/Bindings/Python/TTMLIRModule.h"
+
+namespace mlir::ttmlir::python {
+
+void populateUtilModule(py::module &m) {
+  m.def("debug_print_module", [](MlirModule module) {
+    std::string source;
+    llvm::raw_string_ostream os(source);
+    mlir::OpPrintingFlags flags;
+    flags.enableDebugInfo(); // Enable the debug print
+    auto *op = unwrap(mlirModuleGetOperation(module));
+    op->print(os, flags);
+    return source;
+  });
+
+  m.def("get_loc_name", [](MlirLocation _loc) -> std::string {
+    mlir::Location loc = unwrap(_loc);
+    if (mlir::isa<mlir::NameLoc>(loc)) {
+      mlir::NameLoc nameLoc = mlir::cast<mlir::NameLoc>(loc);
+      return nameLoc.getName().str();
+    }
+    return "-";
+  });
+
+  m.def("get_loc_full", [](MlirLocation _loc) -> std::string {
+    mlir::Location loc = unwrap(_loc);
+    if (mlir::isa<mlir::FileLineColLoc>(loc)) {
+      mlir::FileLineColLoc fileLoc = mlir::cast<mlir::FileLineColLoc>(loc);
+      return fileLoc.getFilename().str() + ":" +
+             std::to_string(fileLoc.getLine()) + ":" +
+             std::to_string(fileLoc.getColumn());
+    }
+    return "-";
+  });
+}
+
+} // namespace mlir::ttmlir::python
diff --git a/python/test_infra/test_utils.py b/python/test_infra/test_utils.py
index 09e86db975..da1957b7f6 100644
--- a/python/test_infra/test_utils.py
+++ b/python/test_infra/test_utils.py
@@ -4,9 +4,8 @@
 
 import os
 import inspect
-from typing import Callable, Dict, List, Optional
+from typing import Callable, List, Optional
 
-import torch
 from ttmlir.dialects import func
 from ttmlir.ir import *
 from ttmlir.passes import (
diff --git a/python/test_infra/ttir_builder.py b/python/test_infra/ttir_builder.py
index 9c832d014d..471c07ca7d 100644
--- a/python/test_infra/ttir_builder.py
+++ b/python/test_infra/ttir_builder.py
@@ -3,12 +3,12 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from __future__ import annotations
-import inspect
 
+import inspect
 from dataclasses import dataclass
 from typing import List, Optional, Union, Tuple, Callable, Dict
 from ttmlir.ir import *
-from ttmlir.dialects import ttir, tt, func, tensor
+from ttmlir.dialects import ttir, tt, tensor
 from ttmlir.passes import create_golden_tensor, DataType
 import torch
 
@@ -17,7 +17,50 @@
 Operand = Union[Value, OpView, Operation]
 
 # Convenience alias for shape
-Shape = Union[List[int], Tuple[int]]
+Shape = Union[List[int], Tuple[int, ...]]
+
+
+def get_loc_of_extra_file_callee(id: int = 0) -> Location:
+    """When called, this function returns a `Location` referring to first
+    callee outside the file of the caller of this function. E.G., if a function
+    in `foo.py` called a function in `bar.py` that then called this function,
+    the location would be pointing to the call in `foo.py`.
+
+    NOTE: this location is _NOT_ in the form of
+    {filename}:{line_number}:{col_number}, but instead in the form:
+    {filename}:{line_number}:id({id}), where id is supplied to this function as
+    a disambiguator for calls that happen on the same line
+
+    Arguments
+    ---------
+
+    id : int
+        An optional variable that defaults to 0 to be appended to the location,
+        disambiguating calls on the same line.
+
+    Returns
+    -------
+
+    A `Location` referring to the first extra file callee of the caller of this function
+
+    """
+
+    stack = inspect.stack()
+
+    # find the innermost frame outside of this file
+    caller_filename = stack[1].filename
+
+    while len(stack) > 0 and stack[0].filename == caller_filename:
+        stack = stack[1:]
+
+    assert (
+        len(stack) > 0
+    ), "Top of callstack to builder funcs must be outside the caller's file"
+
+    # FIXME: this should be a `Location.file`, but for some reason it causes
+    # strange decomposition inheritance behaviour that breaks using this as
+    # a key into the golden map
+    return Location.name(f"{stack[0].filename}:{str(stack[0].lineno)}:id({str(id)})")
 
 
 @dataclass(frozen=True)
@@ -251,40 +294,20 @@ def eltwise_proxy(
         inputs: List[Operand],
     ) -> OpView:
 
-        # Snoop the location of the first caller outside of this file to
-        # annotate the MLIR with. NOTE that this location is _NOT_ row:col, but
-        # instead row:id, where id is a unique id given to all calls to builder
-        # funcs. See `get_next_global_id` for more details
-        stack = inspect.stack()
-
-        # find the innermost frame outside of this file
-        cur_filename = stack[0].filename
-
-        while len(stack) > 0 and stack[0].filename == cur_filename:
-            stack = stack[1:]
-
-        assert (
-            len(stack) > 0
-        ), "Top of callstack to builder funcs must be outside this file"
+        id = self.get_next_global_id()
+        loc = get_loc_of_extra_file_callee(id=id)
 
         with self._ctx, self._loc:
             output = self.empty(self.get_shape(inputs[0]))
 
-            id = self.get_next_global_id()
-
-            op = op_ttir_function(
-                [self._get_type(output)],
-                inputs,
-                [output],
-                loc=Location.name(str(id)),
-            )
+            op = op_ttir_function([self._get_type(output)], inputs, [output], loc=loc)
 
             goldens = []
             for input in inputs:
                 goldens.append(self._get_golden_tensor(input))
 
             golden = Golden(op_golden_function(*goldens))
-            self.id_golden_map[str(id)] = golden
+            self.id_golden_map[str(loc)] = golden
             self._store_golden(op, golden)
             self._override_golden(output, golden)
 
diff --git a/python/ttmlir/util.py b/python/ttmlir/util.py
new file mode 100644
index 0000000000..9bcbfd0856
--- /dev/null
+++ b/python/ttmlir/util.py
@@ -0,0 +1,5 @@
+# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from ._mlir_libs._ttmlir.util import *
diff --git a/runtime/tools/python/ttrt/common/golden.py b/runtime/tools/python/ttrt/common/golden.py
index 055d4c8249..847942615d 100644
--- a/runtime/tools/python/ttrt/common/golden.py
+++ b/runtime/tools/python/ttrt/common/golden.py
@@ -117,17 +117,7 @@ def golden_partial_function(
     print("-----------executing golden comparision-----------")
 
     try:
-        op_debug_str = ttrt.runtime.get_op_debug_str(op_context)
-
-        # find matching golden tensor based on loc in op debug string
-        match = re.search(r"loc\(([^)]+)\)", op_debug_str)
-
-        if not match:
-            print(f"debug_str={op_debug_str}")
-            print("No location found in debug string - skipping golden comparison")
-            return
-
-        loc = match.group(1).replace('"', "")
+        loc = ttrt.runtime.get_op_loc_info(op_context)
         print(f"found location={loc}")
 
         op_golden_tensor = binary.get_debug_info_golden(loc)
diff --git a/test/python/golden/test_ttir_ops.py b/test/python/golden/test_ttir_ops.py
index aa18e10369..e693196f53 100644
--- a/test/python/golden/test_ttir_ops.py
+++ b/test/python/golden/test_ttir_ops.py
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-# RUN: %python %s
+# RUN: SYSTEM_DESC_PATH=%system_desc_path% %python %s
 
 import inspect
 
diff --git a/test/python/smoketest.py b/test/python/smoketest.py
index dfc324e381..88d871101e 100644
--- a/test/python/smoketest.py
+++ b/test/python/smoketest.py
@@ -12,7 +12,6 @@
 
     module = Module.parse(
         """
-    #any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
     %0 = tensor.empty() : tensor<64x128xf32>
     %1 = tensor.empty() : tensor<64x128xf32>
     %2 = tensor.empty() : tensor<64x128xf32>
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/binary/divide_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/binary/divide_op.mlir
index 9505428888..bd8a698c7f 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/binary/divide_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/binary/divide_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_eltwise_divice attributes {} {
   func.func public @test_divide(%arg0: tensor<13x21x3xf32>, %arg1: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
     %0 = stablehlo.divide %arg0, %arg1 : tensor<13x21x3xf32>
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/binary/multiply_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/binary/multiply_op.mlir
index 390fc156b4..63d8773bd7 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/binary/multiply_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/binary/multiply_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_eltwise_multiply attributes {} {
   func.func public @test_multiply(%arg0: tensor<13x21x3xf32>, %arg1: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
     %0 = stablehlo.multiply %arg0, %arg1 : tensor<13x21x3xf32>
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/binary/remainder_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/binary/remainder_op.mlir
index bbca3a3f99..1d4a6a20c8 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/binary/remainder_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/binary/remainder_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_eltwise_remainder attributes {} {
   func.func public @test_remainder(%arg0: tensor<32x32xf32>, %arg1: tensor<32x32xf32>) -> tensor<32x32xf32> {
     %0 = stablehlo.remainder %arg0, %arg1 : tensor<32x32xf32>
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/binary/subtract_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/binary/subtract_op.mlir
index 1be903064c..4003dbf73f 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/binary/subtract_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/binary/subtract_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_eltwise_subtract attributes {} {
   func.func public @test_subtract(%arg0: tensor<13x21x3xf32>, %arg1: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
     %0 = stablehlo.subtract %arg0, %arg1 : tensor<13x21x3xf32>
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/convert_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/convert_op.mlir
index 467325566a..ee2862cadd 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/convert_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/convert_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline  %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_eltwise_convert attributes {} {
   func.func public @test_convert(%arg0: tensor<2x4xf32>) -> tensor<2x4xbf16> {
     %0 = stablehlo.convert %arg0 : (tensor<2x4xf32>) -> tensor<2x4xbf16>
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/dynamic_iota_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/dynamic_iota_op.mlir
index 43241ac6f0..f92517600a 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/dynamic_iota_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/dynamic_iota_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_dnamic_iota attributes {} {
   func.func public @test_dynamic_iota() -> tensor<1x32x128x128xf32> {
     // CHECK: %[[C:.*]] = "ttir.arange"[[C:.*]]
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/floor_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/floor_op.mlir
index 2df91d6c5e..90e99aca6a 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/floor_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/floor_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_eltwise_floor attributes {} {
   func.func public @test_floor(%arg0: tensor<32x32x3xf32>) -> tensor<32x32x3xf32> {
     %0 = stablehlo.floor %arg0 : tensor<32x32x3xf32>
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/gather_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/gather_op.mlir
index e80bb75886..a9b4071edf 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/gather_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/gather_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_gather attributes {} {
   func.func public @test_gather_0(%operand: tensor<32000x1024xf32>, %start_indices: tensor<1x32xi32>) -> tensor<1x32x1024xf32> {
     %0 = "stablehlo.gather"(%operand, %start_indices) <{dimension_numbers = #stablehlo.gather<offset_dims = [2], collapsed_slice_dims = [0], start_index_map = [0], index_vector_dim = 2>, indices_are_sorted = false, slice_sizes = array<i64: 1, 1024>}> : (tensor<32000x1024xf32>, tensor<1x32xi32>) -> tensor<1x32x1024xf32>
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/get_dimension_size_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/get_dimension_size_op.mlir
index 5e2a655388..f5df438612 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/get_dimension_size_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/get_dimension_size_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_get_dimension_size attributes {} {
   func.func public @test_get_dimension_size(%arg0: tensor<13x21x3xf32>) -> tensor<i32> {
     %0 = stablehlo.get_dimension_size %arg0, dim = 1 : (tensor<13x21x3xf32>) -> tensor<i32>
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/iota_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/iota_op.mlir
index 857a621bb0..d7620cd4a6 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/iota_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/iota_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_iota attributes {} {
   func.func public @test_iota() -> tensor<1x32x128x128xf32> {
     // CHECK: %[[C:.*]] = "ttir.arange"[[C:.*]]
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/isfinite_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/isfinite_op.mlir
index bdcef7a959..5f7c3d8359 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/isfinite_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/isfinite_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_eltwise_isfinite attributes {} {
   func.func public @test_isfinite(%arg0: tensor<32x32x3xf32>) -> tensor<32x32x3xi1> {
     // CHECK: %[[E:.*]] = tensor.empty() : tensor<32x32x3xbf16>
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/rsqrt_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/rsqrt_op.mlir
index 13ceed52e9..f52bb72bf0 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/rsqrt_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/rsqrt_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_eltwise_rsqrt attributes {} {
   func.func public @test_rsqrt(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
     %0 = stablehlo.rsqrt %arg0 : tensor<13x21x3xf32>
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/slice_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/slice_op.mlir
index 5839f38652..c94acfb886 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/slice_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/slice_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_eltwise_subtract attributes {} {
   func.func @slice_op(%arg0: tensor<32x64xf32>) -> tensor<8x8xf32> {
   // CHECK: %[[C:.*]] = tensor.empty[[C:.*]]
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/unary/absolute_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/unary/absolute_op.mlir
index dbed57cd3b..1d43609f8f 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/unary/absolute_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/unary/absolute_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_eltwise_abs attributes {} {
   func.func public @test_abs(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
     %0 = stablehlo.abs %arg0 : tensor<13x21x3xf32>
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/unary/cbrt_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/unary/cbrt_op.mlir
index 49eef5df7d..e754202468 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/unary/cbrt_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/unary/cbrt_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_eltwise_rsqrt attributes {} {
   func.func public @test_cbrt(%arg0: tensor<4xf64>) -> tensor<4xf64> {
     %0 = stablehlo.cbrt %arg0 : tensor<4xf64>
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/unary/exponential_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/unary/exponential_op.mlir
index 6b93c664c7..975652f49b 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/unary/exponential_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/unary/exponential_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_eltwise_exp attributes {} {
   func.func public @test_exp(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
     %0 = stablehlo.exponential %arg0 : tensor<13x21x3xf32>
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/unary/negate_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/unary/negate_op.mlir
index 65f491c038..68cfed1e9e 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/unary/negate_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/unary/negate_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_eltwise_neg attributes {} {
   func.func public @test_neg(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
     %0 = stablehlo.negate %arg0 : tensor<13x21x3xf32>
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/unary/sqrt_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/unary/sqrt_op.mlir
index 72d2449b05..99023e472b 100644
--- a/test/ttmlir/Conversion/StableHLOToTTIR/unary/sqrt_op.mlir
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/unary/sqrt_op.mlir
@@ -1,6 +1,5 @@
 // REQUIRES: stablehlo
 // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module @jit_eltwise_sqrt attributes {} {
   func.func public @test_sqrt(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
     %0 = stablehlo.sqrt %arg0 : tensor<13x21x3xf32>
diff --git a/test/ttmlir/Dialect/TTIR/Decomposition/arange_decomposition.mlir b/test/ttmlir/Dialect/TTIR/Decomposition/arange_decomposition.mlir
index 6f72e56f17..6e5ef160c3 100644
--- a/test/ttmlir/Dialect/TTIR/Decomposition/arange_decomposition.mlir
+++ b/test/ttmlir/Dialect/TTIR/Decomposition/arange_decomposition.mlir
@@ -1,5 +1,4 @@
 // RUN: ttmlir-opt --ttir-to-ttir-decomposition %s | FileCheck %s
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module attributes {} {
   func.func @forward(%arg0: tensor<1x32x128x128xf32>) -> tensor<1x32x128x128xf32> {
     // CHECK: %[[C:.*]] = "ttir.arange"[[C:.*]]
diff --git a/test/ttmlir/Dialect/TTIR/convolution/convolution_tests_negative.mlir b/test/ttmlir/Dialect/TTIR/convolution/convolution_tests_negative.mlir
index 278bb9f211..6f960f0dda 100644
--- a/test/ttmlir/Dialect/TTIR/convolution/convolution_tests_negative.mlir
+++ b/test/ttmlir/Dialect/TTIR/convolution/convolution_tests_negative.mlir
@@ -28,7 +28,6 @@ module @jit_convolution_bad_spatial_dimensions {
 }
 
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module @jit_convolution_bad_stride_dimensions {
   func.func public @test_illegal_convolution(%arg0: tensor<1x3x100x100xbf16>, %arg1: tensor<7x3x3x3xbf16>) -> tensor<1x7x100x100xbf16> {
     %0 = tensor.empty() : tensor<1x7x100x100xbf16>
@@ -58,7 +57,6 @@ module @jit_convolution_bad_stride_dimensions {
 }
 
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module @jit_convolution_bad_input_tensor {
   func.func public @test_illegal_convolution(%arg0: tensor<1x3x100x100x100xbf16>, %arg1: tensor<7x3x3x3xbf16>) -> tensor<1x7x100x100xbf16> {
     %0 = tensor.empty() : tensor<1x7x100x100xbf16>
@@ -88,7 +86,6 @@ module @jit_convolution_bad_input_tensor {
 }
 
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module @jit_convolution_bad_weight_tensor {
   func.func public @test_illegal_convolution(%arg0: tensor<1x3x100x100xbf16>, %arg1: tensor<20x7x3x3x3xbf16>) -> tensor<1x7x100x100xbf16> {
     %0 = tensor.empty() : tensor<1x7x100x100xbf16>
@@ -118,7 +115,6 @@ module @jit_convolution_bad_weight_tensor {
 }
 
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module @jit_convolution_bad_bias_tensor {
   func.func public @test_illegal_convolution(%arg0: tensor<1x3x100x100xbf16>, %arg1: tensor<7x3x3x3xbf16>, %arg2: tensor<1x1x7xbf16>) -> tensor<1x7x100x100xbf16> {
     %0 = tensor.empty() : tensor<1x7x100x100xbf16>
diff --git a/test/ttmlir/Dialect/TTIR/index/index_tests_negative.mlir b/test/ttmlir/Dialect/TTIR/index/index_tests_negative.mlir
index 9f5d8b04ae..6b1d5faee3 100644
--- a/test/ttmlir/Dialect/TTIR/index/index_tests_negative.mlir
+++ b/test/ttmlir/Dialect/TTIR/index/index_tests_negative.mlir
@@ -13,7 +13,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the dim is not in the rank range of the input tensor
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @index_negative_invalid_begins(%arg0: tensor<3x128x64xbf16>) -> tensor<3x128x64xbf16> {
     %0 = tensor.empty() : tensor<3x128x64xbf16>
@@ -25,7 +24,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the output type is not equal to the input tensor type
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @index_negative_invalid_output_datatype(%arg0: tensor<3x128x64xbf16>) -> tensor<3x128x32xf32> {
     %0 = tensor.empty() : tensor<3x128x32xf32>
@@ -37,7 +35,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the output rank is not equal to the input tensor rank
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @index_negative_input_output_rank_missmatch(%arg0: tensor<3x128x64xbf16>) -> tensor<3x64x64x1xbf16> {
     %0 = tensor.empty() : tensor<3x64x64x1xbf16>
@@ -49,7 +46,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the begin value exceeds positive limit
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @index_negative_invalid_begin_positive(%arg0: tensor<10x3x128x64xbf16>) -> tensor<10x1x128x64xbf16> {
     %0 = tensor.empty() : tensor<10x1x128x64xbf16>
@@ -61,7 +57,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the begin value exceeds negative limit
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @index_negative_invalid_begin_negative(%arg0: tensor<10x3x128x64xbf16>) -> tensor<10x3x64x64xbf16> {
     %0 = tensor.empty() : tensor<10x3x64x64xbf16>
@@ -73,7 +68,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the end value exceeds positive limit
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @index_negative_invalid_end_positive(%arg0: tensor<10x3x128x64xbf16>) -> tensor<10x3x128x64xbf16> {
     %0 = tensor.empty() : tensor<10x3x128x64xbf16>
@@ -85,7 +79,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the end value exceeds positive limit
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @index_negative_invalid_end_negative(%arg0: tensor<10x3x128x64xbf16>) -> tensor<10x3x128x64xbf16> {
     %0 = tensor.empty() : tensor<10x3x128x64xbf16>
@@ -97,7 +90,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the step value is equal to zero
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @index_negative_step_is_zero(%arg0: tensor<10x3x128x64xbf16>) -> tensor<10x3x128x64xbf16> {
     %0 = tensor.empty() : tensor<10x3x128x64xbf16>
@@ -109,7 +101,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the begin index is greater than end and step is positive
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @index_negative_begin_greater_than_end_positive_step(%arg0: tensor<10x3x128x64xbf16>) -> tensor<10x3x128x64xbf16> {
     %0 = tensor.empty() : tensor<10x3x128x64xbf16>
@@ -121,7 +112,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the end index is greater than begin and step is negative
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @index_negative_begin_less_than_end_negative_step(%arg0: tensor<10x3x128x64xbf16>) -> tensor<10x3x128x64xbf16> {
     %0 = tensor.empty() : tensor<10x3x128x64xbf16>
@@ -133,7 +123,6 @@ module attributes {} {
 
 // Verify that the parsing fails if there is missmatch in output dimension
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @index_negative_invalid_output_shape(%arg0: tensor<10x3x128x64xbf16>) -> tensor<10x3x128x32xbf16> {
     %0 = tensor.empty() : tensor<10x3x128x32xbf16>
diff --git a/test/ttmlir/Dialect/TTIR/matmul/matmul_tests_negative.mlir b/test/ttmlir/Dialect/TTIR/matmul/matmul_tests_negative.mlir
index f15379c8f7..d6c20b0ac5 100644
--- a/test/ttmlir/Dialect/TTIR/matmul/matmul_tests_negative.mlir
+++ b/test/ttmlir/Dialect/TTIR/matmul/matmul_tests_negative.mlir
@@ -12,7 +12,6 @@ module attributes {} {
 }
 
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @matmul_negative_1d_1d_inner_dimension_missmatch(%arg0: tensor<128xbf16>, %arg1: tensor<bf16>) -> tensor<1xbf16> {
     // CHECK: error: 'ttir.matmul' op Input B must be at least a 1D tensor
@@ -24,7 +23,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the output is a scalar
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @matmul_negative_1d_1d_inner_dimension_missmatch(%arg0: tensor<128xbf16>, %arg1: tensor<128xbf16>) -> tensor<bf16> {
     // CHECK: error: 'ttir.matmul' op Scalar output is not supported, output must be at least a 1D tensor
@@ -35,7 +33,6 @@ module attributes {} {
 }
 
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @matmul_negative_1d_1d_inner_dimension_missmatch(%arg0: tensor<128xbf16>, %arg1: tensor<128xbf16>) -> tensor<2xbf16> {
     // CHECK: error: 'ttir.matmul' op Scalar output must be a 1D tensor of size 1
@@ -47,7 +44,6 @@ module attributes {} {
 
 // Inner dimension mismatch tests
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @matmul_negative_1d_1d_inner_dimension_missmatch(%arg0: tensor<128xbf16>, %arg1: tensor<64xbf16>) -> tensor<1xbf16> {
     // CHECK: error: 'ttir.matmul' op Input A[-1](128) and B[-2](64) must have matching inner dimensions
@@ -58,7 +54,6 @@ module attributes {} {
 }
 
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
 func.func @matmul_negative_1d_2d_inner_dimension_missmatch(%arg0: tensor<64xbf16>, %arg1: tensor<128x64xbf16>) -> tensor<64xbf16> {
     // CHECK: error: 'ttir.matmul' op Input A[-1](64) and B[-2](128) must have matching inner dimensions
@@ -69,7 +64,6 @@ func.func @matmul_negative_1d_2d_inner_dimension_missmatch(%arg0: tensor<64xbf16
 }
 
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @matmul_negative_2d_1d_inner_dimension_missmatch(%arg0: tensor<64x128xbf16>, %arg1: tensor<64xbf16>) -> tensor<64xbf16> {
    // CHECK: error: 'ttir.matmul' op Input A[-1](128) and B[-2](64) must have matching inner dimensions
@@ -80,7 +74,6 @@ module attributes {} {
 }
 
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @matmul_negative_2d_2d_inner_dimension_missmatch(%arg0: tensor<64x128xbf16>, %arg1: tensor<64x128xbf16>) -> tensor<64x64xbf16> {
     // CHECK: error: 'ttir.matmul' op Input A[-1](128) and B[-2](64) must have matching inner dimensions
@@ -91,7 +84,6 @@ module attributes {} {
 }
 
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @matmul_negative_nd_nd_inner_dimension_missmatch(%arg0: tensor<7x64x128xbf16>, %arg1: tensor<1x64x128xbf16>) -> tensor<7x64x64xbf16> {
     // CHECK: error: 'ttir.matmul' op Input A[-1](128) and B[-2](64) must have matching inner dimensions
@@ -103,7 +95,6 @@ module attributes {} {
 
 // Batch dimension mismatch tests
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @matmul_negative_nd_nd_same_rank_batch_broadcast_incompatible_1(%arg0: tensor<7x64x128xbf16>, %arg1: tensor<2x128x64xbf16>) -> tensor<7x64x64xbf16> {
    // CHECK: error: 'ttir.matmul' op Batch dimensions of input A(7) and B(2) are not broadcast compatible
@@ -114,7 +105,6 @@ module attributes {} {
 }
 
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @matmul_negative_nd_nd_same_rank_batch_broadcast_incompatible_2(%arg0: tensor<2x7x64x128xbf16>, %arg1: tensor<7x1x128x64xbf16>) -> tensor<7x7x64x64xbf16> {
     // CHECK: error: 'ttir.matmul' op Batch dimensions of input A(2,7) and B(7,1) are not broadcast compatible
@@ -125,7 +115,6 @@ module attributes {} {
 }
 
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @matmul_negative_nd_nd_different_rank_batch_broadcast_incompatible(%arg0: tensor<12x2x7x64x128xbf16>, %arg1: tensor<7x1x128x64xbf16>) -> tensor<12x7x7x64x64xbf16> {
     // CHECK: error: 'ttir.matmul' op Batch dimensions of input A(12,2,7) and B(7,1) are not broadcast compatible
@@ -137,7 +126,6 @@ module attributes {} {
 
 // Output shape mismatch tests
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @matmul_negative_2d_2d_inner_dimension_missmatch(%arg0: tensor<64x128xbf16>, %arg1: tensor<128x64xbf16>) -> tensor<64xbf16> {
     // CHECK: error: 'ttir.matmul' op Output shape rank(1) must match the expected output shape rank(2)
@@ -148,7 +136,6 @@ module attributes {} {
 }
 
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @matmul_negative_2d_2d_inner_dimension_missmatch(%arg0: tensor<64x128xbf16>, %arg1: tensor<128x64xbf16>) -> tensor<64x128xbf16> {
     // CHECK: error: 'ttir.matmul' op Output shape dimension[1](128) doesn't match the expected output shape dimension[1](64)
diff --git a/test/ttmlir/Dialect/TTIR/slice/slice_tests_negative.mlir b/test/ttmlir/Dialect/TTIR/slice/slice_tests_negative.mlir
index db444258ec..5e7eaf43c6 100644
--- a/test/ttmlir/Dialect/TTIR/slice/slice_tests_negative.mlir
+++ b/test/ttmlir/Dialect/TTIR/slice/slice_tests_negative.mlir
@@ -13,7 +13,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the begins size is not equal to the input tensor rank
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @slice_negative_invalid_begins(%arg0: tensor<3x128x64xbf16>) -> tensor<1x64x64xbf16> {
     %0 = tensor.empty() : tensor<1x64x64xbf16>
@@ -25,7 +24,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the ends size is not equal to the input tensor rank
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @slice_negative_invalid_ends(%arg0: tensor<3x128x64xbf16>) -> tensor<1x64x64xbf16> {
     %0 = tensor.empty() : tensor<1x64x64xbf16>
@@ -37,7 +35,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the step size is not equal to the input tensor rank
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @slice_negative_invalid_step(%arg0: tensor<3x128x64xbf16>) -> tensor<1x64x64xbf16> {
     %0 = tensor.empty() : tensor<1x64x64xbf16>
@@ -49,7 +46,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the output type is not equal to the input tensor type
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @slice_negative_invalid_output_datatype(%arg0: tensor<3x128x64xbf16>) -> tensor<1x64x64xf32> {
     %0 = tensor.empty() : tensor<1x64x64xf32>
@@ -61,7 +57,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the output rank is not equal to the input tensor rank
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @slice_negative_input_output_rank_missmatch(%arg0: tensor<3x128x64xbf16>) -> tensor<1x1x64x64xbf16> {
     %0 = tensor.empty() : tensor<1x1x64x64xbf16>
@@ -73,7 +68,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the begin value exceeds positive limit
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @slice_negative_invalid_begin_positive(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x16x8xbf16> {
     %0 = tensor.empty() : tensor<4x1x16x8xbf16>
@@ -85,7 +79,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the begin value exceeds negative limit
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @slice_negative_invalid_begin_negative(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x16x8xbf16> {
     %0 = tensor.empty() : tensor<4x1x16x8xbf16>
@@ -97,7 +90,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the end value exceeds positive limit
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @slice_negative_invalid_end_positive(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x16x8xbf16> {
     %0 = tensor.empty() : tensor<4x1x16x8xbf16>
@@ -109,7 +101,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the end value exceeds negative limit
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @slice_negative_invalid_end_negative(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x16x8xbf16> {
     %0 = tensor.empty() : tensor<4x1x16x8xbf16>
@@ -121,7 +112,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the step value is equal to zero
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @slice_negative_step_is_zero(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x16x8xbf16> {
     %0 = tensor.empty() : tensor<4x1x16x8xbf16>
@@ -133,7 +123,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the begin index is greater than end and step is positive
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @slice_negative_begin_greater_than_end_positive_step(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x16x8xbf16> {
     %0 = tensor.empty() : tensor<4x1x16x8xbf16>
@@ -144,7 +133,6 @@ module attributes {} {
 }
 
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @slice_negative_begin_greater_than_end_positive_step(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x8x8xbf16> {
     %0 = tensor.empty() : tensor<4x1x8x8xbf16>
@@ -156,7 +144,6 @@ module attributes {} {
 
 // Verify that the parsing fails if the end index is greater than begin and step is negative
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @slice_negative_begin_less_than_end_negative_step(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x16x8xbf16> {
     %0 = tensor.empty() : tensor<4x1x16x8xbf16>
@@ -167,7 +154,6 @@ module attributes {} {
 }
 
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @slice_negative_begin_less_than_end_negative_step(%arg0: tensor<10x3x128x64xbf16>) -> tensor<5x1x16x8xbf16> {
     %0 = tensor.empty() : tensor<5x1x16x8xbf16>
@@ -179,7 +165,6 @@ module attributes {} {
 
 // Verify that the parsing fails if there is missmatch in output dimension
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @slice_negative_invalid_output_shape(%arg0: tensor<10x3x128x64xbf16>) -> tensor<4x1x16x16xbf16> {
     %0 = tensor.empty() : tensor<4x1x16x16xbf16>
diff --git a/test/ttmlir/Dialect/TTNN/Transforms/ttnn_modify_signatures_for_dylib_0.mlir b/test/ttmlir/Dialect/TTNN/Transforms/ttnn_modify_signatures_for_dylib_0.mlir
new file mode 100644
index 0000000000..f7cab85905
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/Transforms/ttnn_modify_signatures_for_dylib_0.mlir
@@ -0,0 +1,12 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" --ttnn-modify-signatures-for-dylib %s | FileCheck %s
+
+module attributes {} {
+  // CHECK: func.func @add(%arg0: tuple<[[TENSOR_A:.*>]], [[TENSOR_B:.*>]]>) -> tensor<32x32xbf16, #ttnn_layout> {
+  func.func @add(%arg0: tensor<32x32xbf16>, %arg1: tensor<32x32xbf16>) -> tensor<32x32xbf16> {
+    // CHECK-NEXT: %0 = tt.get_tuple_element %arg0[0] : (tuple<[[TENSOR_A]], [[TENSOR_B]]>) -> [[TENSOR_A]]
+    // CHECK-NEXT: %1 = tt.get_tuple_element %arg0[1] : (tuple<[[TENSOR_A]], [[TENSOR_B]]>) -> [[TENSOR_B]]
+    %0 = tensor.empty() : tensor<32x32xbf16>
+    %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<32x32xbf16>, tensor<32x32xbf16>, tensor<32x32xbf16>) -> tensor<32x32xbf16>
+    return %1 : tensor<32x32xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/ccl/all_reduce.mlir b/test/ttmlir/Dialect/TTNN/ccl/all_reduce.mlir
index 1eaf04df7f..5fa77776be 100644
--- a/test/ttmlir/Dialect/TTNN/ccl/all_reduce.mlir
+++ b/test/ttmlir/Dialect/TTNN/ccl/all_reduce.mlir
@@ -17,7 +17,6 @@ module attributes {} {
 // -----
 
 // Verify lowering of ttir all_reduce to ttnn ops
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device_tile>
 module attributes {} {
   func.func @all_reduce(%arg0: tensor<1x1x4096x16384xf32>) -> tensor<1x1x4096x16384xf32> {
     %0 = tensor.empty() : tensor<1x1x4096x16384xf32>
diff --git a/test/ttmlir/Dialect/TTNN/clamp/clamp_tests_negative.mlir b/test/ttmlir/Dialect/TTNN/clamp/clamp_tests_negative.mlir
index f97dde2ba1..67eb3d2116 100644
--- a/test/ttmlir/Dialect/TTNN/clamp/clamp_tests_negative.mlir
+++ b/test/ttmlir/Dialect/TTNN/clamp/clamp_tests_negative.mlir
@@ -2,7 +2,6 @@
 // Negative tests for matmul operation
 
 // Verify that the parsing fails if input and output shapes do not match.
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @clamp(%arg0: tensor<64x64xbf16>) -> tensor<64x128xbf16> {
     // CHECK: error: 'ttnn.clamp' op input and output must have same shape.
@@ -14,7 +13,6 @@ module attributes {} {
 
 // Verify that parsing fails in case of more than one input.
 // -----
-#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @clamp2(%arg0: tensor<64x128xbf16>, %arg1: tensor<64x128xbf16>) -> tensor<64x128xbf16> {
     // CHECK: error: 'ttnn.clamp' op expects one tensor as input.
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_dram_buffer_type.mlir b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_dram_buffer_type.mlir
new file mode 100644
index 0000000000..bdce3fd0f9
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_dram_buffer_type.mlir
@@ -0,0 +1,13 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=BFInterleaved" %s | FileCheck %s
+// XFAIL: *
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<6144x6144xbf16>, %arg1: tensor<6144x6144xbf16>, %arg2: tensor<6144x6144xbf16>) -> tensor<6144x6144xbf16> {
+    // CHECK: #[[L1_:.*]] = #ttnn.buffer_type<l1>
+    %0 = tensor.empty() : tensor<6144x6144xbf16>
+    %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<6144x6144xbf16>, tensor<6144x6144xbf16>, tensor<6144x6144xbf16>) -> tensor<6144x6144xbf16>
+    %2 = tensor.empty() : tensor<6144x6144xbf16>
+    %3 = "ttir.add"(%1, %arg2, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<6144x6144xbf16>, tensor<6144x6144xbf16>, tensor<6144x6144xbf16>) -> tensor<6144x6144xbf16>
+    return %3 : tensor<6144x6144xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_dram_operands_l1_op.mlir b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_dram_operands_l1_op.mlir
new file mode 100644
index 0000000000..ec809a60a7
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_dram_operands_l1_op.mlir
@@ -0,0 +1,19 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=BFInterleaved" %s | FileCheck %s
+module attributes {} {
+  func.func @forward(%arg0: tensor<5120x8192xbf16>, %arg1: tensor<8192x5120xbf16>) -> tensor<5120x5120xbf16> {
+    // CHECK: #[[L1_:.*]] = #ttnn.buffer_type<l1>
+    // CHECK-DAG: #[[LAYOUT_5:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<32x20x!tt.tile<32x32, bf16>, #dram>, <interleaved>>
+    // CHECK-DAG: #[[LAYOUT_6:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<20x32x!tt.tile<32x32, bf16>, #dram>, <interleaved>>
+    // CHECK-DAG: #[[LAYOUT_7:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<20x20x!tt.tile<32x32, bf16>, #l1_>, <interleaved>>
+    %0 = tensor.empty() : tensor<5120x8192xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<5120x8192xbf16, #[[LAYOUT_6]]>
+    %1 = "ttir.relu"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<5120x8192xbf16>, tensor<5120x8192xbf16>) -> tensor<5120x8192xbf16>
+    %2 = tensor.empty() : tensor<8192x5120xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<8192x5120xbf16, #[[LAYOUT_5]]>
+    %3 = "ttir.relu"(%arg1, %2) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<8192x5120xbf16>, tensor<8192x5120xbf16>) -> tensor<8192x5120xbf16>
+    %4 = tensor.empty() : tensor<5120x5120xbf16>
+    // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<5120x5120xbf16, #[[LAYOUT_7]]>
+    %5 = "ttir.matmul"(%1, %3, %4) : (tensor<5120x8192xbf16>, tensor<8192x5120xbf16>, tensor<5120x5120xbf16>) -> tensor<5120x5120xbf16>
+    return %5 : tensor<5120x5120xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_l1_operands_dram_op.mlir b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_l1_operands_dram_op.mlir
new file mode 100644
index 0000000000..0460f6ac47
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/all_l1_operands_dram_op.mlir
@@ -0,0 +1,19 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=BFInterleaved" %s | FileCheck %s
+module attributes {} {
+  func.func @forward(%arg0: tensor<6144x1024xbf16>, %arg1: tensor<1024x6144xbf16>) -> tensor<6144x6144xbf16> {
+    // CHECK: #[[L1_:.*]] = #ttnn.buffer_type<l1>
+    // CHECK-DAG: #[[LAYOUT_5:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<24x4x!tt.tile<32x32, bf16>, #l1_>, <interleaved>>
+    // CHECK-DAG: #[[LAYOUT_6:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<4x24x!tt.tile<32x32, bf16>, #l1_>, <interleaved>>
+    // CHECK-DAG: #[[LAYOUT_7:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<24x24x!tt.tile<32x32, bf16>, #dram>, <interleaved>>
+    %0 = tensor.empty() : tensor<6144x1024xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<6144x1024xbf16, #[[LAYOUT_5]]>
+    %1 = "ttir.relu"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<6144x1024xbf16>, tensor<6144x1024xbf16>) -> tensor<6144x1024xbf16>
+    %2 = tensor.empty() : tensor<1024x6144xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<1024x6144xbf16, #[[LAYOUT_6]]>
+    %3 = "ttir.relu"(%arg1, %2) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1024x6144xbf16>, tensor<1024x6144xbf16>) -> tensor<1024x6144xbf16>
+    %4 = tensor.empty() : tensor<6144x6144xbf16>
+    // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<6144x6144xbf16, #[[LAYOUT_7]]>
+    %5 = "ttir.matmul"(%1, %3, %4) : (tensor<6144x1024xbf16>, tensor<1024x6144xbf16>, tensor<6144x6144xbf16>) -> tensor<6144x6144xbf16>
+    return %5 : tensor<6144x6144xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/fork_join_01.mlir b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/fork_join_01.mlir
new file mode 100644
index 0000000000..5446082c75
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/fork_join_01.mlir
@@ -0,0 +1,35 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=BFInterleaved" %s | FileCheck %s
+//
+//         A
+//         |
+//         B
+//       /   \
+//      C     D
+//       \   /
+//         E
+//         |
+//         F
+//
+// There is enough L1 memory to schedule this fork-join but only if we allocate
+// the output tensor of the op B once.
+//
+module attributes {} {
+  func.func @forward(%arg0: tensor<4096x5120xbf16>, %arg1: tensor<5120x1024xbf16>, %arg2: tensor<5120x1024xbf16>) -> tensor<4096x1024xbf16> {
+    // CHECK: #[[L1_:.*]] = #ttnn.buffer_type<l1>
+    // CHECK: #[[LAYOUT_5:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<16x20x!tt.tile<32x32, bf16>, #l1_>, <interleaved>>
+    // CHECK: #[[LAYOUT_6:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<16x4x!tt.tile<32x32, bf16>, #l1_>, <interleaved>>
+    %0 = tensor.empty() : tensor<4096x5120xbf16>
+    // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<4096x5120xbf16, #[[LAYOUT_5]]>
+    %1 = "ttir.relu"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<4096x5120xbf16>, tensor<4096x5120xbf16>) -> tensor<4096x5120xbf16>
+    %2 = tensor.empty() : tensor<4096x1024xbf16>
+    // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<4096x1024xbf16, #[[LAYOUT_6]]>
+    %3 = "ttir.matmul"(%1, %arg1, %2) : (tensor<4096x5120xbf16>, tensor<5120x1024xbf16>, tensor<4096x1024xbf16>) -> tensor<4096x1024xbf16>
+    %4 = tensor.empty() : tensor<4096x1024xbf16>
+    // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<4096x1024xbf16, #[[LAYOUT_6]]>
+    %5 = "ttir.matmul"(%1, %arg2, %4) : (tensor<4096x5120xbf16>, tensor<5120x1024xbf16>, tensor<4096x1024xbf16>) -> tensor<4096x1024xbf16>
+    %6 = tensor.empty() : tensor<4096x1024xbf16>
+    // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<4096x1024xbf16, #[[LAYOUT_6]]>
+    %7 = "ttir.add"(%3, %5, %6) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<4096x1024xbf16>, tensor<4096x1024xbf16>, tensor<4096x1024xbf16>) -> tensor<4096x1024xbf16>
+    return %7 : tensor<4096x1024xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/fork_join_02.mlir b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/fork_join_02.mlir
new file mode 100644
index 0000000000..ee44b78c21
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/optimizer/bf_interleaved_policy/fork_join_02.mlir
@@ -0,0 +1,42 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=BFInterleaved" %s | FileCheck %s
+//
+//         A
+//         |
+//         B
+//       /   \
+//      C     D
+//      |     |
+//      E     |
+//       \   /
+//         F
+//         |
+//         G
+//
+// There is not enough L1 memory to schedule this fork-join even if we allocate
+// the output tensor of the op B once becuase the output tensor of the op C is
+// too large to fit in L1 on its own.
+//
+module attributes {} {
+  func.func @forward(%arg0: tensor<4096x5120xbf16>, %arg1: tensor<5120x9216xbf16>, %arg2: tensor<9216x1024xbf16>, %arg3: tensor<5120x1024xbf16>) -> tensor<4096x1024xbf16> {
+    // CHECK: #[[L1_:.*]] = #ttnn.buffer_type<l1>
+    // CHECK: #[[LAYOUT_9:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<16x20x!tt.tile<32x32, bf16>, #l1_>, <interleaved>>
+    // CHECK: #[[LAYOUT_10:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<16x36x!tt.tile<32x32, bf16>, #dram>, <interleaved>>
+    // CHECK: #[[LAYOUT_11:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8, (d0, d1) -> (0, d0, d1)>, memref<16x4x!tt.tile<32x32, bf16>, #l1_>, <interleaved>>
+    %0 = tensor.empty() : tensor<4096x5120xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<4096x5120xbf16, #[[LAYOUT_9]]>
+    %1 = "ttir.relu"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<4096x5120xbf16>, tensor<4096x5120xbf16>) -> tensor<4096x5120xbf16>
+    %2 = tensor.empty() : tensor<4096x9216xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<4096x9216xbf16, #[[LAYOUT_10]]>
+    %3 = "ttir.matmul"(%1, %arg1, %2) : (tensor<4096x5120xbf16>, tensor<5120x9216xbf16>, tensor<4096x9216xbf16>) -> tensor<4096x9216xbf16>
+    %4 = tensor.empty() : tensor<4096x1024xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<4096x1024xbf16, #[[LAYOUT_11]]>
+    %5 = "ttir.matmul"(%3, %arg2, %4) : (tensor<4096x9216xbf16>, tensor<9216x1024xbf16>, tensor<4096x1024xbf16>) -> tensor<4096x1024xbf16>
+    %6 = tensor.empty() : tensor<4096x1024xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<4096x1024xbf16, #[[LAYOUT_11]]>
+    %7 = "ttir.matmul"(%1, %arg3, %6) : (tensor<4096x5120xbf16>, tensor<5120x1024xbf16>, tensor<4096x1024xbf16>) -> tensor<4096x1024xbf16>
+    %8 = tensor.empty() : tensor<4096x1024xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<4096x1024xbf16, #[[LAYOUT_11]]>
+    %9 = "ttir.add"(%5, %7, %8) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<4096x1024xbf16>, tensor<4096x1024xbf16>, tensor<4096x1024xbf16>) -> tensor<4096x1024xbf16>
+    return %9 : tensor<4096x1024xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/all_l1_interleaved_policy.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/all_l1_interleaved_policy.mlir
similarity index 97%
rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/all_l1_interleaved_policy.mlir
rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/all_l1_interleaved_policy.mlir
index a895ca25ef..eaa16fb268 100644
--- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/all_l1_interleaved_policy.mlir
+++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/all_l1_interleaved_policy.mlir
@@ -1,4 +1,4 @@
-// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s
 module attributes {} {
   func.func @forward(%arg0: tensor<64x128xbf16>, %arg1: tensor<128x96xbf16>, %arg2: tensor<64x96xbf16>, %arg3: tensor<96x32xbf16>, %arg4: tensor<64x32xbf16>) -> tensor<64x32xbf16> {
     // CHECK: #[[L1_:.*]] = #ttnn.buffer_type<l1>
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/fork_join.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/fork_join.mlir
similarity index 97%
rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/fork_join.mlir
rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/fork_join.mlir
index d9336db8a8..657da93390 100644
--- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/fork_join.mlir
+++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/fork_join.mlir
@@ -1,4 +1,4 @@
-// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s
 //
 //         A
 //         |
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/mnist_l1_interleaved.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/mnist_l1_interleaved.mlir
similarity index 98%
rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/mnist_l1_interleaved.mlir
rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/mnist_l1_interleaved.mlir
index 3d437a74fa..d1a32308ff 100644
--- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/mnist_l1_interleaved.mlir
+++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/mnist_l1_interleaved.mlir
@@ -1,4 +1,4 @@
-// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s
 #loc = loc("MNISTLinear":4294967295:0)
 module @"tt-forge-graph" attributes {} {
   func.func @main(%arg0: tensor<1x784xf32> loc("MNISTLinear":4294967295:0), %arg1: tensor<1x10xf32> loc("MNISTLinear":4294967295:0), %arg2: tensor<256x10xf32> loc("MNISTLinear":4294967295:0), %arg3: tensor<1x256xf32> loc("MNISTLinear":4294967295:0), %arg4: tensor<784x256xf32> loc("MNISTLinear":4294967295:0)) -> tensor<1x10xf32> {
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_ABC_l1_None.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_ABC_l1_None.mlir
similarity index 96%
rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_ABC_l1_None.mlir
rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_ABC_l1_None.mlir
index ecd90f1abb..436518ce0f 100644
--- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_ABC_l1_None.mlir
+++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_ABC_l1_None.mlir
@@ -1,4 +1,4 @@
-// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s
 //
 //       A     B
 //        \   /
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AB_l1_C.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_AB_l1_C.mlir
similarity index 97%
rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AB_l1_C.mlir
rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_AB_l1_C.mlir
index 056ded8d35..8f018f9515 100644
--- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AB_l1_C.mlir
+++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_AB_l1_C.mlir
@@ -1,4 +1,4 @@
-// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s
 //
 //       A     B
 //        \   /
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AC_l1_B.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_AC_l1_B.mlir
similarity index 97%
rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AC_l1_B.mlir
rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_AC_l1_B.mlir
index caaf3254d8..0791c46295 100644
--- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AC_l1_B.mlir
+++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_AC_l1_B.mlir
@@ -1,4 +1,4 @@
-// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s
 //
 //       A     B
 //        \   /
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_A_l1_BC.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_A_l1_BC.mlir
similarity index 97%
rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_A_l1_BC.mlir
rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_A_l1_BC.mlir
index 63cd3bcaa2..049f8f0b45 100644
--- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_A_l1_BC.mlir
+++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_A_l1_BC.mlir
@@ -1,4 +1,4 @@
-// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s
 //
 //       A     B
 //        \   /
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_BC_l1_A.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_BC_l1_A.mlir
similarity index 97%
rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_BC_l1_A.mlir
rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_BC_l1_A.mlir
index 9f12e8b6f6..0a63866a63 100644
--- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_BC_l1_A.mlir
+++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_BC_l1_A.mlir
@@ -1,4 +1,4 @@
-// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s
 //
 //       A     B
 //        \   /
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_B_l1_AC.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_B_l1_AC.mlir
similarity index 97%
rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_B_l1_AC.mlir
rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_B_l1_AC.mlir
index c594ca4182..c75c2f39c7 100644
--- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_B_l1_AC.mlir
+++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_B_l1_AC.mlir
@@ -1,4 +1,4 @@
-// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s
 //
 //       A     B
 //        \   /
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_C_l1_AB.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_C_l1_AB.mlir
similarity index 97%
rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_C_l1_AB.mlir
rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_C_l1_AB.mlir
index eb2a51b174..635540ea61 100644
--- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_C_l1_AB.mlir
+++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_C_l1_AB.mlir
@@ -1,4 +1,4 @@
-// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s
 //
 //       A     B
 //        \   /
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_None_l1_ABC.mlir b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_None_l1_ABC.mlir
similarity index 96%
rename from test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_None_l1_ABC.mlir
rename to test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_None_l1_ABC.mlir
index 883842694f..1d591ef2b2 100644
--- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_None_l1_ABC.mlir
+++ b/test/ttmlir/Dialect/TTNN/optimizer/greedy_l1_interleaved_policy/simple_join_tests/dram_None_l1_ABC.mlir
@@ -1,4 +1,4 @@
-// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s | FileCheck %s
 //
 //       A     B
 //        \   /
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/single_op.mlir b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/single_op.mlir
deleted file mode 100644
index 7b8aa07593..0000000000
--- a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/single_op.mlir
+++ /dev/null
@@ -1,9 +0,0 @@
-// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
-// UNSUPPORTED: true
-module attributes {} {
-  func.func @forward(%arg0: tensor<5120x5120xbf16>) -> tensor<5120x5120xbf16> {
-    %0 = tensor.empty() : tensor<5120x5120xbf16>
-    %1 = "ttir.relu"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<5120x5120xbf16>, tensor<5120x5120xbf16>) -> tensor<5120x5120xbf16>
-    return %1 : tensor<5120x5120xbf16>
-  }
-}
diff --git a/test/ttmlir/Silicon/TTNN/ones.mlir b/test/ttmlir/Silicon/TTNN/ones.mlir
index 249530700b..660de36ae1 100644
--- a/test/ttmlir/Silicon/TTNN/ones.mlir
+++ b/test/ttmlir/Silicon/TTNN/ones.mlir
@@ -2,7 +2,6 @@
 // RUN: FileCheck %s --input-file=%t.mlir
 // RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
 
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module {
   func.func @ones_2d() -> tensor<32x128xbf16> {
     // CHECK: {{.*}} = "ttnn.ones"() {{.*}}
diff --git a/test/ttmlir/Silicon/TTNN/optimizer/simple_fork_join.mlir b/test/ttmlir/Silicon/TTNN/optimizer/simple_fork_join.mlir
index e323e10249..b6b3c4d686 100644
--- a/test/ttmlir/Silicon/TTNN/optimizer/simple_fork_join.mlir
+++ b/test/ttmlir/Silicon/TTNN/optimizer/simple_fork_join.mlir
@@ -1,4 +1,4 @@
-// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s > %t.mlir
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=GreedyL1Interleaved" %s > %t.mlir
 // RUN: FileCheck %s --input-file=%t.mlir
 // RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
 // UNSUPPORTED: true
diff --git a/test/unittests/Optimizer/CMakeLists.txt b/test/unittests/Optimizer/CMakeLists.txt
index b05c8ae294..cf11c479fd 100644
--- a/test/unittests/Optimizer/CMakeLists.txt
+++ b/test/unittests/Optimizer/CMakeLists.txt
@@ -1,7 +1,7 @@
 add_mlir_unittest(OptimizerTests
     TestShardSolver.cpp
     TestOptimizerOverrides.cpp
-    TestL1InterleavedPolicy.cpp
+    TestGreedyL1InterleavedPolicy.cpp
 )
 
 target_link_libraries(OptimizerTests
diff --git a/test/unittests/Optimizer/TestL1InterleavedPolicy.cpp b/test/unittests/Optimizer/TestGreedyL1InterleavedPolicy.cpp
similarity index 93%
rename from test/unittests/Optimizer/TestL1InterleavedPolicy.cpp
rename to test/unittests/Optimizer/TestGreedyL1InterleavedPolicy.cpp
index b09b65245d..3bc0c54410 100644
--- a/test/unittests/Optimizer/TestL1InterleavedPolicy.cpp
+++ b/test/unittests/Optimizer/TestGreedyL1InterleavedPolicy.cpp
@@ -16,14 +16,14 @@
 #include "ttmlir/Dialect/TTNN/IR/TTNN.h"
 #include "ttmlir/Dialect/TTNN/IR/TTNNOps.h"
 
-#include "ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h"
+#include "ttmlir/Dialect/TTNN/Analysis/GreedyL1InterleavedPolicy.h"
 
 using namespace mlir::tt::ttnn;
 
 constexpr int TensorDimX = 128;
 constexpr int TensorDimY = 128;
 
-class L1InterleavedPolicyBase : public ::testing::Test {
+class GreedyL1InterleavedPolicyBase : public ::testing::Test {
 public:
   mlir::MLIRContext context;
   mlir::OwningOpRef<mlir::ModuleOp> module;
@@ -31,9 +31,9 @@ class L1InterleavedPolicyBase : public ::testing::Test {
   mlir::func::FuncOp func;
   mlir::tt::DeviceAttr deviceAttr;
 
-  using OpMemSpec = L1InterleavedPolicy::OpMemSpec;
-  using OpConfig = L1InterleavedPolicy::OpConfig;
-  using L1Usage = L1InterleavedPolicy::L1Usage;
+  using OpMemSpec = GreedyL1InterleavedPolicy::OpMemSpec;
+  using OpConfig = GreedyL1InterleavedPolicy::OpConfig;
+  using L1Usage = GreedyL1InterleavedPolicy::L1Usage;
 
   void SetUp() override {
     context.loadDialect<TTNNDialect>();
@@ -121,7 +121,7 @@ class L1InterleavedPolicyBase : public ::testing::Test {
   void TearDown() override {}
 };
 
-TEST_F(L1InterleavedPolicyBase, VerifyGreedyPolicy) {
+TEST_F(GreedyL1InterleavedPolicyBase, VerifyGreedyPolicy) {
   std::vector<L1ChainConfig> l1ChainConfigs;
   llvm::DenseMap<mlir::Operation *, std::vector<TTNNLayoutAttr>> legalLayouts;
   llvm::DenseMap<mlir::func::FuncOp, llvm::SmallVector<mlir::Operation *>>
@@ -174,8 +174,8 @@ TEST_F(L1InterleavedPolicyBase, VerifyGreedyPolicy) {
                                  legalLayouts, opsL1Usage);
 
   // Run greedy config picker policy
-  L1InterleavedPolicy l1InterleavedPolicy(nullptr, l1ChainConfigs, legalLayouts,
-                                          schedule, usableL1CacheSize);
+  GreedyL1InterleavedPolicy l1InterleavedPolicy(
+      nullptr, l1ChainConfigs, legalLayouts, schedule, usableL1CacheSize);
   OpConfig greedyConfig = l1InterleavedPolicy.getGreedyConfig(opD, opsL1Usage);
 
   // Sanity checks
diff --git a/test/unittests/Optimizer/TestOptimizerOverrides.cpp b/test/unittests/Optimizer/TestOptimizerOverrides.cpp
index 31118262f5..fee0796505 100644
--- a/test/unittests/Optimizer/TestOptimizerOverrides.cpp
+++ b/test/unittests/Optimizer/TestOptimizerOverrides.cpp
@@ -356,9 +356,9 @@ TEST_F(TestOptimizerOverrideHandler, TestSetMemoryLayoutAnalysisPolicy) {
             mlir::tt::MemoryLayoutAnalysisPolicyType::DFSharding);
 
   optimizerOverridesHandler.setMemoryLayoutAnalysisPolicy(
-      mlir::tt::MemoryLayoutAnalysisPolicyType::L1Interleaved);
+      mlir::tt::MemoryLayoutAnalysisPolicyType::GreedyL1Interleaved);
   ASSERT_EQ(optimizerOverridesHandler.getMemoryLayoutAnalysisPolicy(),
-            mlir::tt::MemoryLayoutAnalysisPolicyType::L1Interleaved);
+            mlir::tt::MemoryLayoutAnalysisPolicyType::GreedyL1Interleaved);
 }
 
 // Test the setInputLayoutOverrides method
diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt
index 54a1563c4f..e49dc0d8c9 100644
--- a/third_party/CMakeLists.txt
+++ b/third_party/CMakeLists.txt
@@ -1,6 +1,6 @@
 include(ExternalProject)
 
-set(TT_METAL_VERSION "2f59d5e02ef0da955b99cf0f49da1759c772a67a")
+set(TT_METAL_VERSION "388e187b27f8924ea09512719f666407c40f6bd9")
 
 if ("$ENV{ARCH_NAME}" STREQUAL "grayskull")
   set(ARCH_NAME "grayskull")
@@ -34,6 +34,7 @@ set(TTMETAL_INCLUDE_DIRS
   ${PROJECT_SOURCE_DIR}/third_party/tt-metal/src/tt-metal/.cpmcache/fmt/73b5ec45edbd92babfd91c3777a9e1ab9cac8238/include
   ${PROJECT_SOURCE_DIR}/third_party/tt-metal/src/tt-metal/.cpmcache/magic_enum/1e1af177d4ab0ef660f105434fd1017c4d1f8c17/include/magic_enum
   ${PROJECT_SOURCE_DIR}/third_party/tt-metal/src/tt-metal/.cpmcache/boost_core/e679bef5c160cf29d0f37d549881dc5f5a58c332/include
+  ${PROJECT_SOURCE_DIR}/third_party/tt-metal/src/tt-metal/.cpmcache/json/230202b6f5267cbf0c8e5a2f17301964d95f83ff/include
   PARENT_SCOPE
 )
 
diff --git a/tools/explorer/CMakeLists.txt b/tools/explorer/CMakeLists.txt
index e0128691a1..3879558545 100644
--- a/tools/explorer/CMakeLists.txt
+++ b/tools/explorer/CMakeLists.txt
@@ -3,7 +3,7 @@ include(ExternalProject)
 set(TT_EXPLORER_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/run.py)
 set(TTMLIR_BUILD_BIN_DIR ${TTMLIR_BINARY_DIR}/bin)
 
-set(MODEL_EXPLORER_VERSION "d0b53c3b7049fd41ea1caff193706272c399fac9")
+set(MODEL_EXPLORER_VERSION "ca884d5eb3291507e7f4e76776957e231b2d9b6d")
 ExternalProject_Add(
   model-explorer
   PREFIX ${CMAKE_CURRENT_SOURCE_DIR}/model-explorer
@@ -18,7 +18,9 @@ ExternalProject_Add(
 add_custom_target(explorer
   COMMENT "Building tt-explorer... ${TTMLIR_BIN_DIR}"
   COMMAND pip install $<$<CONFIG:Debug>:-e> ${CMAKE_CURRENT_SOURCE_DIR}/tt_adapter
-  COMMAND pip install ${CMAKE_CURRENT_SOURCE_DIR}/model-explorer/src/model-explorer/src/server/package
+  COMMAND pip uninstall -y ai-edge-model-explorer
+  COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_CURRENT_SOURCE_DIR}/model-explorer/src/model-explorer/src/server/package/build
+  COMMAND pip install --upgrade ${CMAKE_CURRENT_SOURCE_DIR}/model-explorer/src/model-explorer/src/server/package
 
   DEPENDS TTMLIRPythonModules model-explorer ttrt ttmlir-opt ttmlir-translate
 )
diff --git a/tools/explorer/test/run_tests.py b/tools/explorer/test/run_tests.py
index 75925a44e2..485104fbbf 100644
--- a/tools/explorer/test/run_tests.py
+++ b/tools/explorer/test/run_tests.py
@@ -124,7 +124,7 @@ def test_execute_model(model_path):
 def test_execute_mnist_l1_interleaved():
     execute_command_and_wait(
         "test/ttmlir/Silicon/TTNN/optimizer/mnist_sharding.mlir",
-        {"optimizationPolicy": "L1 Interleaved"},
+        {"optimizationPolicy": "Greedy L1 Interleaved"},
         timeout=60,
     )
 
diff --git a/tools/explorer/tt_adapter/src/tt_adapter/main.py b/tools/explorer/tt_adapter/src/tt_adapter/main.py
index 53ea686691..9d0307d113 100644
--- a/tools/explorer/tt_adapter/src/tt_adapter/main.py
+++ b/tools/explorer/tt_adapter/src/tt_adapter/main.py
@@ -10,7 +10,8 @@
 
 class OptimizationPolicy(enum.Enum):
     DFSharding = "DF Sharding"
-    L1Interleaved = "L1 Interleaved"
+    GreedyL1Interleaved = "Greedy L1 Interleaved"
+    BFInterleaved = "BF Interleaved"
     OptimizerDisabled = "Optimizer Disabled"
 
 
@@ -55,7 +56,9 @@ def convert(
 
         # Convert TTIR to Model Explorer Graphs and Display/Return
         graph, perf_data = mlir.build_graph(module, perf_trace)
-        return {"graphs": [graph], "perf_data": perf_data}
+        if perf_data:
+            graph = utils.add_to_dataclass(graph, "perf_data", perf_data.graphsData)
+        return {"graphs": [graph]}
 
     def execute(
         self, model_path: str, settings: Dict
diff --git a/tools/explorer/tt_adapter/src/tt_adapter/mlir.py b/tools/explorer/tt_adapter/src/tt_adapter/mlir.py
index 843606b06c..eac036a38e 100644
--- a/tools/explorer/tt_adapter/src/tt_adapter/mlir.py
+++ b/tools/explorer/tt_adapter/src/tt_adapter/mlir.py
@@ -7,20 +7,14 @@
 from model_explorer import graph_builder, node_data_builder
 
 from ttmlir.dialects import tt, ttnn, ttir
-from ttmlir import ir
+from ttmlir import ir, util
 
 
 def get_loc_str(loc):
     try:
-        # Constant loc( at the start of the location and ) at the end. Can just strip these characters
-        loc = str(loc)
-        if loc.startswith("loc(") and loc.endswith(")"):
-            # Fuzzy parse first string inside location
-            # 'loc("matmul_1"("MNISTLinear":4294967295:10))' -> matmul_1
-            # TODO(odjuricic) Need to have this pybinded.
-            res = re.search(r'"([^"]+)"', loc).group(1)
-        else:
-            res = loc  # This is a fallback to just visualize / see what the loc is if not processable.
+        res = util.get_loc_name(loc)
+        if res == "-":
+            res = util.get_loc_full(loc)
     except:
         res = "unknown"
     return res
diff --git a/tools/explorer/tt_adapter/src/tt_adapter/utils.py b/tools/explorer/tt_adapter/src/tt_adapter/utils.py
index 4b404a204b..6cc32c81ee 100644
--- a/tools/explorer/tt_adapter/src/tt_adapter/utils.py
+++ b/tools/explorer/tt_adapter/src/tt_adapter/utils.py
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 import ttmlir
-from dataclasses import make_dataclass
+from dataclasses import make_dataclass, is_dataclass, asdict
 
 
 def parse_mlir_file(model_path):
@@ -18,5 +18,15 @@ def to_dataclass(obj: dict, dc_name: str = "tempClass"):
     return make_dataclass(dc_name, ((k, type(v)) for k, v in obj.items()))(**obj)
 
 
-def to_adapter_format(obj: dict):
-    return {"graphs": [to_dataclass(obj)]}
+def add_to_dataclass(dataclass, new_attr_name: str, new_attr_value):
+    if not is_dataclass(dataclass):
+        return None
+    classname = dataclass.__class__.__name__
+    dataclass = asdict(dataclass)
+    dataclass[new_attr_name] = new_attr_value
+    return to_dataclass(dataclass, dc_name=classname)
+
+
+def to_adapter_format(*objs):
+    res = [x if is_dataclass(x) else to_dataclass(x) for x in objs]
+    return {"graphs": res}
diff --git a/tools/ttnn-standalone/CMakeLists.txt b/tools/ttnn-standalone/CMakeLists.txt
index 23c78c7ca9..0be29d763d 100644
--- a/tools/ttnn-standalone/CMakeLists.txt
+++ b/tools/ttnn-standalone/CMakeLists.txt
@@ -63,7 +63,7 @@ set(INCLUDE_DIRS
     $ENV{TT_METAL_HOME}
     $ENV{TT_METAL_HOME}/tt_metal
     $ENV{TT_METAL_HOME}/tt_metal/third_party/umd
-    $ENV{TT_METAL_HOME}/tt_metal/third_party/umd/device
+    $ENV{TT_METAL_HOME}/tt_metal/third_party/umd/device/api
     $ENV{TT_METAL_HOME}/tt_metal/third_party/fmt
     $ENV{TT_METAL_HOME}/tt_metal/hw/inc
     $ENV{TT_METAL_HOME}/tt_metal/hw/inc/${ARCH_NAME}