Skip to content

Commit

Permalink
Merge branch 'main' into vwells/llvm_helper_transform
Browse files Browse the repository at this point in the history
  • Loading branch information
vwellsTT committed Dec 17, 2024
2 parents f7147fb + 8c37b9d commit 6348c92
Show file tree
Hide file tree
Showing 102 changed files with 939 additions and 507 deletions.
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@
/test/ttmlir/Dialect/TTNN/optimizer/ @nobradovictt @odjuricicTT
/test/ttmlir/Silicon/TTNN/optimizer/ @nobradovictt @odjuricicTT
/test/unittests/Optimizer @nobradovictt @odjuricicTT
/tools/explorer/ @odjuricicTT @nobradovictt @vprajapati-tt
/tools/ @svuckovicTT @mtopalovicTT
/tools/explorer/ @odjuricicTT @nobradovictt @vprajapati-tt
7 changes: 1 addition & 6 deletions docs/src/adding-an-op.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,13 @@ There are many things to break down here, starting from the top:
be critical for modeling buffer allocation / lifetimes. Note the 3rd argument
`AnyRankedTensor:$output`.
- Next we have a list of `arguments`. These arguments consist of a mixture of
`Type`s (i.e. `AnyRankedTensor`) and `Attribute`s (i.e. `TT_OperandConstraintArrayAttr`).
`Type`s (i.e. `AnyRankedTensor`) and `Attribute`s.
[Read more about Types & Attributes
here](https://mlir.llvm.org/docs/DefiningDialects/AttributesAndTypes/#attributes).
- `AnyRankedTensor` is part of a tablegen standard library which type
aliases to MLIR's builtin Tensor type, with the added constraint that the
tensor has a static rank. As much as possible we want to use the builtin
types and infrastructure provided by MLIR.
- `TT_OperandConstraintArrayAttr` is a custom attribute that we have defined
in the [`TT`](./autogen/md/Dialect/TTDialect.md) dialect. This attribute is
used to specify constraints on the
operands of the operation. For example, the `TTIR_MatmulOp` requires that
the input tensors be in tile layout, this attribute captures this constraint.
- Next we have a list of `results` in this case just 1, which aliases the
`output` tensor. One drawback of DPS is that the result tensor and the
output tensor will appear to have different SSA names in the IR, but they
Expand Down
3 changes: 1 addition & 2 deletions docs/src/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,7 @@ module attributes {tt.system_desc = #tt.system_desc<[<#tt.arch<wormhole_b0>, #tt
defines the type of result

- Quotes are added around ttir.multiply since it's part of a
custom dialect, and more custom assembly instructions are
applied to specify operand_constraints.
custom dialect.

- Operations typically have operands (arguments) and results which
are highlighted with %, these results and operands help to show
Expand Down
7 changes: 0 additions & 7 deletions include/ttmlir-c/TTAttrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,6 @@ ttmlirTTIteratorTypeAttrGet(MlirContext ctx, uint32_t iteratorType);
MLIR_CAPI_EXPORTED MlirAttribute ttmlirTTIteratorTypeArrayAttrGet(
MlirContext ctx, uint32_t *iteratorTypes, size_t iteratorTypesSize);

MLIR_CAPI_EXPORTED MlirAttribute
ttmlirTTOperandConstraintAttrGet(MlirContext ctx, uint32_t OperandConstraint);

MLIR_CAPI_EXPORTED MlirAttribute ttmlirTTOperandConstraintArrayAttrGet(
MlirContext ctx, uint32_t *OperandConstraints,
size_t OperandConstraintsSize);

MLIR_CAPI_EXPORTED MlirAttribute ttmlirTTTileSizeAttrGet(MlirContext ctx,
int64_t y, int64_t x);

Expand Down
1 change: 1 addition & 0 deletions include/ttmlir/Bindings/Python/TTMLIRModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ void populateTTNNModule(py::module &m);
void populateOverridesModule(py::module &m);
void populateOptimizerOverridesModule(py::module &m);
void populatePassesModule(py::module &m);
void populateUtilModule(py::module &m);
} // namespace mlir::ttmlir::python

#endif // TTMLIR_BINDINGS_PYTHON_TTMLIRMODULE_H
1 change: 1 addition & 0 deletions include/ttmlir/Dialect/TT/IR/TTOps.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Dialect.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/Interfaces/ControlFlowInterfaces.h"
#include "mlir/Interfaces/DestinationStyleOpInterface.h"
#include "mlir/Interfaces/InferTypeOpInterface.h"
Expand Down
25 changes: 25 additions & 0 deletions include/ttmlir/Dialect/TT/IR/TTOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,30 @@
#define TTMLIR_TTMLIR_TTOPS_TD

include "ttmlir/Dialect/TT/IR/TTOpsTypes.td"
include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/IR/CommonTypeConstraints.td"

def TT_GetTupleElementOp: TT_Op<"get_tuple_element", [Pure, DeclareOpInterfaceMethods<InferTypeOpInterface>]> {
let summary = "GetTupleElement operation";
let description = [{
Extracts element at `index` position of the `operand` tuple and produces a `result`.

Example:
```mlir
%result = tt.get_tuple_element %operand[0] : (tuple<tensor<32x32xbf16>, tuple<tensor<1x32xf32>>>) -> tensor<32x32xbf16>
```
}];

let arguments = (ins TT_Tuple:$operand,
ConfinedAttr<I32Attr, [IntNonNegative]>:$index
);

let results = (outs TT_TupleReturnType:$result);

let assemblyFormat = [{
$operand `[` $index `]` attr-dict `:` functional-type(operands, results)
}];
}

#endif
41 changes: 0 additions & 41 deletions include/ttmlir/Dialect/TT/IR/TTOpsEnums.td
Original file line number Diff line number Diff line change
Expand Up @@ -126,47 +126,6 @@ def TT_OOBVal : I32EnumAttr<"OOBVal", "TT OOBVal",
let cppNamespace = "::mlir::tt";
}

def TT_OperandConstraintSystem : I32BitEnumAttrCaseBit<"System", 0, "system">;
def TT_OperandConstraintDRAM : I32BitEnumAttrCaseBit<"DRAM", 1, "dram">;
def TT_OperandConstraintL1 : I32BitEnumAttrCaseBit<"L1", 2, "l1">;
def TT_OperandConstraintScalar : I32BitEnumAttrCaseBit<"Scalar", 3, "scalar">;
def TT_OperandConstraintTile : I32BitEnumAttrCaseBit<"Tile", 4, "tile">;
def TT_OperandConstraintNone : I32BitEnumAttrCaseBit<"None", 5, "none">;
def TT_OperandConstraintInterleaved : I32BitEnumAttrCaseBit<"Interleaved", 6, "interleaved">;
def TT_OperandConstraintSingleBank : I32BitEnumAttrCaseBit<"SingleBank", 7, "single_bank">;
def TT_OperandConstraintHeightSharded : I32BitEnumAttrCaseBit<"HeightSharded", 8, "height_sharded">;
def TT_OperandConstraintWidthSharded : I32BitEnumAttrCaseBit<"WidthSharded", 9, "width_sharded">;
def TT_OperandConstraintBlockSharded : I32BitEnumAttrCaseBit<"BlockSharded", 10, "block_sharded">;
def TT_OperandConstraintSystemScalar : I32BitEnumAttrCaseGroup<"SystemScalar", [TT_OperandConstraintSystem, TT_OperandConstraintScalar], "system_scalar">;
def TT_OperandConstraintAnyLayout : I32BitEnumAttrCaseGroup<"AnyLayout", [TT_OperandConstraintNone, TT_OperandConstraintInterleaved, TT_OperandConstraintSingleBank, TT_OperandConstraintHeightSharded, TT_OperandConstraintWidthSharded, TT_OperandConstraintBlockSharded], "any_layout">;
def TT_OperandConstraintAny : I32BitEnumAttrCaseGroup<"Any", [TT_OperandConstraintSystem, TT_OperandConstraintDRAM, TT_OperandConstraintL1, TT_OperandConstraintScalar, TT_OperandConstraintTile, TT_OperandConstraintAnyLayout], "any">;
def TT_OperandConstraintAnyDevice : I32BitEnumAttrCaseGroup<"AnyDevice", [TT_OperandConstraintDRAM, TT_OperandConstraintL1, TT_OperandConstraintScalar, TT_OperandConstraintTile, TT_OperandConstraintAnyLayout], "any_device">;
def TT_OperandConstraintAnyDeviceTile : I32BitEnumAttrCaseGroup<"AnyDeviceTile", [TT_OperandConstraintDRAM, TT_OperandConstraintL1, TT_OperandConstraintTile, TT_OperandConstraintAnyLayout], "any_device_tile">;
def TT_OperandConstraintL1BlockSharded : I32BitEnumAttrCaseGroup<"L1BlockSharded", [TT_OperandConstraintL1, TT_OperandConstraintScalar, TT_OperandConstraintTile, TT_OperandConstraintBlockSharded], "l1_block_sharded">;
def TT_OperandConstraint : I32BitEnumAttr<"OperandConstraint", "TT Operand Constraints",
[
TT_OperandConstraintSystem,
TT_OperandConstraintDRAM,
TT_OperandConstraintL1,
TT_OperandConstraintScalar,
TT_OperandConstraintTile,
TT_OperandConstraintNone,
TT_OperandConstraintInterleaved,
TT_OperandConstraintSingleBank,
TT_OperandConstraintHeightSharded,
TT_OperandConstraintWidthSharded,
TT_OperandConstraintBlockSharded,
TT_OperandConstraintSystemScalar,
TT_OperandConstraintAnyLayout,
TT_OperandConstraintAny,
TT_OperandConstraintAnyDevice,
TT_OperandConstraintAnyDeviceTile,
TT_OperandConstraintL1BlockSharded,
]> {
let genSpecializedAttr = 0;
let cppNamespace = "::mlir::tt";
}

def TT_ChipCapabilityPCIE : I32BitEnumAttrCaseBit<"PCIE", 0, "pcie">;
def TT_ChipCapabilityHostMMIO : I32BitEnumAttrCaseBit<"HostMMIO", 1, "host_mmio">;

Expand Down
14 changes: 8 additions & 6 deletions include/ttmlir/Dialect/TT/IR/TTOpsTypes.td
Original file line number Diff line number Diff line change
Expand Up @@ -428,12 +428,6 @@ def TT_IteratorTypeAttr : EnumAttr<TT_Dialect, TT_IteratorType, "iterator_type">

def TT_IteratorTypeArrayAttr : TypedArrayAttrBase<TT_IteratorTypeAttr, "">;

def TT_OperandConstraintAttr : EnumAttr<TT_Dialect, TT_OperandConstraint, "operand_constraint"> {
let assemblyFormat = "`<` $value `>`";
}

def TT_OperandConstraintArrayAttr : TypedArrayAttrBase<TT_OperandConstraintAttr, "">;

def TT_ArgumentAllocationAttr : TT_Attr<"ArgumentAllocation", "arg_alloc", []> {
let summary = "Argument allocation attribute in TT dialect";
let description = [{
Expand Down Expand Up @@ -494,4 +488,12 @@ def TT_Device : TT_Type<"Device", "device", []> {
let assemblyFormat = "`<` $desc `>`";
}

//===----------------------------------------------------------------------===//
// Auxiliary type definitions
//===----------------------------------------------------------------------===//

def TT_Tuple : NestedTupleOf<[AnyRankedTensor]>;

def TT_TupleReturnType : AnyTypeOf<[AnyRankedTensor]>;

#endif
95 changes: 0 additions & 95 deletions include/ttmlir/Dialect/TT/Utils/OperandConstraints.h

This file was deleted.

6 changes: 0 additions & 6 deletions include/ttmlir/Dialect/TTIR/IR/TTIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ def TTIR_GenericOp : TTIR_DPSOp<"generic", [AttrSizedOperandSegments]> {
TT_GridAttr:$grid,
AffineMapArrayAttr:$indexing_maps,
TT_IteratorTypeArrayAttr:$iterator_types,
TT_OperandConstraintArrayAttr:$operand_constraints,
DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$operand_cb_mapping); // index of input operand and index of cb go together
let results = (outs Variadic<AnyRankedTensor>:$results);
let regions = (region AnyRegion:$region);
Expand Down Expand Up @@ -126,11 +125,6 @@ def TTIR_ToLayoutOp : TTIR_Op<"to_layout", [DestinationStyleOpInterface, TTIROpI

let extraClassDeclaration = [{
MutableOperandRange getDpsInitsMutable() { return getOutputMutable(); }
ArrayAttr getOperandConstraints() {
return nullptr;
// TODO return below, but we need a way to properly create an ArrayAttr:
// return {OperandConstraint::Any, OperandConstraint::Any};
}

struct CompoundComponents {
bool isLayoutChange;
Expand Down
76 changes: 76 additions & 0 deletions include/ttmlir/Dialect/TTNN/Analysis/BFInterleavedPolicy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
//
// SPDX-License-Identifier: Apache-2.0

#ifndef TTMLIR_DIALECT_TTNN_ANALYSIS_BFINTERLEAVEDPOLICY_H
#define TTMLIR_DIALECT_TTNN_ANALYSIS_BFINTERLEAVEDPOLICY_H

#include "ttmlir/Dialect/TT/IR/TTOpsTypes.h"
#include "ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h"
#include <cstdint>

namespace mlir::tt::ttnn {

// The goal of this policy is to always solve simple fork-joins if that is
// possible. Fork-join is considered to be simple if there is no need for DRAM
// spill in its execution. Furthermore, if DRAM spill is necessary, this policy
// will not produce globally optimal solution.
//
class BFInterleavedPolicy : public MemoryLayoutAnalysisPolicy {
public:
// In order to keep track of the L1 memory usage, we have to know two things
// for each op:
// 1. The L1 memory usage of each op's output tensor.
// 2. The number of op's users currently relying on the op's output tensor.
// This is important for fork ops where the output tensor is used by
// multiple other ops.
//
struct OpL1MemUsage {
uint64_t l1MemUsagePerUser;
uint64_t numOfUnscheduledUsers;
};

public:
BFInterleavedPolicy(
Operation *rootOp, std::vector<L1ChainConfig> &l1ChainConfigs,
const llvm::DenseMap<Operation *, std::vector<TTNNLayoutAttr>>
&legalLayouts,
llvm::DenseMap<func::FuncOp, llvm::SmallVector<Operation *>> &schedule,
unsigned usableL1CacheSize)
: MemoryLayoutAnalysisPolicy(rootOp, l1ChainConfigs, legalLayouts,
schedule, usableL1CacheSize) {}

void run() final;

private:
// Check if the op is analyzable. Op is analyzable if it has at least one
// legal layout.
bool isAnalyzable(Operation *op);

// Iterate over all operands of the op that satisfy the analyzability
// criterium defined by the isAnalyzable method. This is an abstraction
// for the boilerplate code used in different places within the policy.
//
void walkOnAnalyzableOperands(Operation *op,
function_ref<void(Operation *)> callback);

// Fetch op's DRAM layout from legalLayouts.
bool hasDRAMBufferType(Operation *op);
TTNNLayoutAttr getDRAMLayout(Operation *op);

// Fetch op's L1 Interleaved layout from legalLayouts.
bool hasL1BufferType(Operation *op);
TTNNLayoutAttr getL1InterleavedLayout(Operation *op);

size_t getAvailableL1CacheSize() const {
// Figure out this const based on exec data, but will be replaced
// with API.
//
constexpr float tensorL1UsageCap = 0.75;
return tensorL1UsageCap * usableL1CacheSize;
}
};

} // namespace mlir::tt::ttnn

#endif // TTMLIR_DIALECT_TTNN_ANALYSIS_BFINTERLEAVEDPOLICY_H
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
//
// SPDX-License-Identifier: Apache-2.0

#ifndef TTMLIR_DIALECT_TTNN_ANALYSIS_L1INTERLEAVEDPOLICY_H
#define TTMLIR_DIALECT_TTNN_ANALYSIS_L1INTERLEAVEDPOLICY_H
#ifndef TTMLIR_DIALECT_TTNN_ANALYSIS_GREEDYL1INTERLEAVEDPOLICY_H
#define TTMLIR_DIALECT_TTNN_ANALYSIS_GREEDYL1INTERLEAVEDPOLICY_H

#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h"
Expand All @@ -12,7 +12,7 @@

namespace mlir::tt::ttnn {

class L1InterleavedPolicy : public MemoryLayoutAnalysisPolicy {
class GreedyL1InterleavedPolicy : public MemoryLayoutAnalysisPolicy {
public:
struct OpMemSpec {
TTNNLayoutAttr layout;
Expand Down Expand Up @@ -46,7 +46,7 @@ class L1InterleavedPolicy : public MemoryLayoutAnalysisPolicy {
};

public:
L1InterleavedPolicy(
GreedyL1InterleavedPolicy(
Operation *rootOp, std::vector<L1ChainConfig> &l1ChainConfigs,
const llvm::DenseMap<Operation *, std::vector<TTNNLayoutAttr>>
&legalLayouts,
Expand Down Expand Up @@ -124,4 +124,4 @@ class L1InterleavedPolicy : public MemoryLayoutAnalysisPolicy {

} // namespace mlir::tt::ttnn

#endif // TTMLIR_DIALECT_TTNN_ANALYSIS_L1INTERLEAVEDPOLICY_H
#endif // TTMLIR_DIALECT_TTNN_ANALYSIS_GREEDYL1INTERLEAVEDPOLICY_H
Loading

0 comments on commit 6348c92

Please sign in to comment.