Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix embedding tests that were previously failing #1128

Merged
merged 1 commit into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion include/ttmlir/Dialect/TTNN/IR/TTNNOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -410,17 +410,22 @@ def TTNN_MaxOp : TTNN_ReductionOp<"max"> {
}];
}

def TTNN_EmbeddingOp : TTNN_Op<"embedding"> {
def TTNN_EmbeddingOp : TTNN_NamedDPSOp<"embedding"> {
let summary = "Embedding op.";
let description = [{
Embedding operation.
}];

let arguments = (ins AnyRankedTensor:$input,
AnyRankedTensor:$output,
AnyRankedTensor:$weight);

let results = (outs AnyRankedTensor:$result);

let extraClassDeclaration = [{
MutableOperandRange getDpsInitsMutable() { return getOutputMutable(); }
}];

let hasVerifier = 1;
}

Expand Down
2 changes: 1 addition & 1 deletion include/ttmlir/Target/TTNN/program.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ table ReductionOp {
table EmbeddingOp {
input: tt.target.TensorRef;
weight: tt.target.TensorRef;
output: tt.target.TensorRef;
out: tt.target.TensorRef;
}

table SoftmaxOp {
Expand Down
4 changes: 2 additions & 2 deletions lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ class ToLayoutOpConversionPattern
bool shouldForceRowMajor(ttir::ToLayoutOp op) const {
for (mlir::Operation *user : op.getResult().getUsers()) {
if (isa<ttir::Conv2dOp>(user) || isa<ttir::MaxPool2dOp>(user) ||
isa<ttir::SliceOp>(user)) {
isa<ttir::SliceOp>(user) || isa<ttir::EmbeddingOp>(user)) {
return true;
}
}
Expand Down Expand Up @@ -317,7 +317,7 @@ class EmbeddingOpConversionPattern
ConversionPatternRewriter &rewriter) const override {
rewriter.replaceOpWithNewOp<ttnn::EmbeddingOp>(
op, this->getTypeConverter()->convertType(op.getType()),
adaptor.getInput(), adaptor.getWeight());
adaptor.getInput(), adaptor.getOutput(), adaptor.getWeight());

return success();
}
Expand Down
10 changes: 6 additions & 4 deletions runtime/lib/ttnn/operations/embedding/embedding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,16 @@ void run(const ::tt::target::ttnn::EmbeddingOp *op, ProgramContext &context) {

// default params for embedding op
std::optional<int> padToken = std::nullopt;
::tt::tt_metal::Layout layout = ::ttnn::ROW_MAJOR_LAYOUT;
::tt::tt_metal::Layout layout = utils::isTilized(op->out())
? ::ttnn::TILE_LAYOUT
: ::ttnn::ROW_MAJOR_LAYOUT;
auto embeddingsType = ::ttnn::operations::embedding::EmbeddingsType::GENERIC;
::ttnn::DataType outputDataType = utils::getDataType(op->output());
::ttnn::DataType outputDataType = utils::getDataType(op->out());
::ttnn::MemoryConfig outputMemoryConfig =
utils::createMemoryConfig(op->output());
utils::createMemoryConfig(op->out());
::ttnn::Tensor out =
::ttnn::embedding(input, weight, padToken, layout, embeddingsType,
outputDataType, outputMemoryConfig);
tensorPool.insert_or_assign(op->output()->global_id(), out);
tensorPool.insert_or_assign(op->out()->global_id(), out);
Comment on lines 20 to +31
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh this is an interesting problem we didn't foresee... So in order to supply the destination arg, we need to supply all the other optional parameters?

In the TTNN Defaults Design Doc you mentioned that

In TTNN they include all explicit layout info in the parameters, but if the optional dps output tensor is provided, all the explicit parameters get ignored and they extract all information from the output tensor.

Now I understand what you were talking about... We should raise this with them, seems like bad API design. Dest arg and other optional args should live in 2 separate overloads.

@sdjordjevicTT @nsmithtt you guys have regular syncs with ttnn folks? Is this something that we could ask them to take care of?

Copy link
Contributor Author

@jnie-TT jnie-TT Nov 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@svuckovicTT yeah exactly. Seems like this is inconsistent as well. Looking at binary.cpp, we ignore memory_config if we pass in the optional output tensor (this is the point I was making in the document):

inline Tensor binary_impl(
    uint8_t queue_id,
    BinaryOpType binary_op_type,
    const ttnn::Tensor &input_tensor,
    const float scalar,
    const std::optional<ttnn::MemoryConfig> &memory_config = std::nullopt,
    const std::optional<Tensor> &optional_output_tensor = std::nullopt) {
    // output_memory_config is extracted from optional_output_tensor when possible
    auto output_memory_config = optional_output_tensor.has_value()
                                    ? optional_output_tensor.value().memory_config()
                                    : memory_config.value_or(input_tensor.memory_config());

However in embedding, we ignore the optional_output_tensor completely:

// optional_output_tensor is unused in this function
struct EmbeddingOperation {
    static inline Tensor invoke(
        uint8_t queue_id,
        const Tensor& input_tensor_arg,
        const Tensor& weight_arg,
        const std::optional<int>& pad_token = std::nullopt,
        const Layout& layout = ttnn::ROW_MAJOR_LAYOUT,
        EmbeddingsType embeddings_type = EmbeddingsType::GENERIC,
        const std::optional<const DataType> dtype = std::nullopt,
        const std::optional<MemoryConfig>& memory_config = std::nullopt,
        std::optional<Tensor> optional_output_tensor = std::nullopt) {
        if (pad_token.has_value()) {
            embeddings_type = EmbeddingsType::PADDED;
        }

        auto hidden_embedding_dim = weight_arg.get_shape()[-1];
        auto padded_hidden_embedding_dim = weight_arg.get_shape().with_tile_padding()[-1];
        auto weight = ttnn::unsqueeze_to_4D(weight_arg);

        auto batch_size = input_tensor_arg.get_shape()[0];
        auto sentence_size = input_tensor_arg.get_shape()[-1];
        auto input_tensor =
            ttnn::reshape(input_tensor_arg, ttnn::SimpleShape{std::array<uint32_t, 4>{batch_size, 1, 1, sentence_size}});

        bool tilized = layout == ttnn::TILE_LAYOUT;
        auto embeddings = operation::run(
                              Embeddings{
                                  .output_mem_config = memory_config.value_or(input_tensor.memory_config()),
                                  .tilized = tilized,
                                  .embeddings_type = embeddings_type,
                                  .pad_token = pad_token,
                                  .output_dtype = dtype.value_or(weight.get_dtype())},
                              {input_tensor, weight})
                              .at(0);
        embeddings = ttnn::reshape(
            embeddings, ttnn::SimpleShape{std::array<uint32_t, 3>{batch_size, sentence_size, hidden_embedding_dim}});
        return embeddings;
    }

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I understand what you were talking about... We should raise this with them, seems like bad API design. Dest arg and other optional args should live in 2 separate overloads.

So my understanding is that you either supply 1 or 2:

  1. Explicit parameters, like output_dtype / output_memory_config / etc.
  2. Output tensor (DPS style)

If you supply 2, then 1 are ignored, this makes sense to me, what are they supposed to do if you supply both?

Copy link
Contributor Author

@jnie-TT jnie-TT Nov 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nsmithtt so in eltwise binary it ignores 1. and uses everything in 2. if you provide both.
However looking at embedding, it completely ignores 2. whether or not you provide it and completely uses 1.
So it's inconsistent across ops currently. And my other question in the document was that would we want to model our ops like this as well? Since the current goal is to match the modelling of ttnn. Currently alot of our ops (for example eltwise binary) only provides 2. and doesn't have the option to provide 1 in the tablegen/flatbuffer schemas.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we probably want to model it how TTNN does, although that does feel a bit cumbersome. Would like to get thoughts from @sdjordjevicTT and @svuckovicTT, I'm not sure how else we could model it though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nsmithtt I agree, it makes the parameters explicit, while there's also the need for the tensor to have this information because the consumer op would likely need it. I guess a note here would be that if we ever force anything, we need to change both the op params and the output tensor. We hit an issue previously in the compiler that when forcing row_major/tile we updated the op params but not the tensor info.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So my understanding is that you either supply 1 or 2

AFAIK, in C++, you can't supply just 2 - if you want to supply 2, you need to supply 1 as well, given they're both (all) optional. All the non-dest optional parameters come before the optional dest param. This makes it cumbersome to use DPS, because now you need to create random parameters that are not going to be used. (Sometimes.)

Having 2 separate overloads here is the solution, though I don't expect we'll get that anytime soon, if ever. Can we talk to them and see if they're willing to canonicalize to respecting dest param's properties? That would make it easier for us to not have to specialize each op.

Otherwise, I don't see us being able to do anything besides modelling what TTNN does.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK, in C++, you can't supply just 2 - if you want to supply 2, you need to supply 1 as well, given they're both (all) optional. All the non-dest optional parameters come before the optional dest param. This makes it cumbersome to use DPS, because now you need to create random parameters that are not going to be used. (Sometimes.)

Not sure I'm following, if you had API:

Tensor add(Tensor a, Tensor b, optional<DType> output_dtype = nullopt, optional<Tensor> out = nullopt);

Then for each case we'd do respectively:

  1. add(a, b, DType::Float16, nullopt);
  2. add(a, b, nullopt, out);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I'm following

Of course, it's because I'm not making any sense... While I was saying optional parameters, I was thinking default parameters, disregarding that optional params can be supplied with nullopts. Sincere apologies for wasting time!

I think we probably want to model it how TTNN does, although that does feel a bit cumbersome.

I'm partial to us modelling TTNN accurately as well. It'll simplify runtimes, no need to work around what was supplied and what wasn't - just "copy-paste" parameters from the IR to the API call (have some default constructors for nullopts, etc.). @sdjordjevicTT anything to add?

I was already thinking of adapting an op here and there for emitc path, just to make things smoother, wondering if it's okay if we roll this out on a per-op, need-to-have basis.

}
} // namespace tt::runtime::ttnn::operations::embedding
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ bool isOnDevice(const ::ttnn::Tensor &tensor) {
return tensor.storage_type() == ::tt::tt_metal::StorageType::DEVICE;
}

bool isTilized(const ::tt::target::TensorRef *tensorRef) {
const ::tt::target::Dim2d *tileShape =
tensorRef->desc()->layout()->memory_desc()->tile_shape();
return tileShape->x() == 32 and tileShape->y() == 32;
}

::tt::target::MemorySpace
getMemorySpace(const ::tt::target::TensorRef *tensorRef) {
return tensorRef->desc()->layout()->memory_desc()->memory_space();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ bool isOnHost(const ::ttnn::Tensor &tensor);

bool isOnDevice(const ::ttnn::Tensor &tensor);

bool isTilized(const ::tt::target::TensorRef *tensorRef);

bool inSystemMemory(const ::tt::target::TensorRef *tensorRef);

::tt::target::MemorySpace
Expand Down
8 changes: 4 additions & 4 deletions test/ttmlir/Dialect/TTNN/embedding/embedding_1d_tensor.mlir
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline %s | FileCheck %s
#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
module attributes {} {
func.func @forward(%arg0: tensor<32xf32>, %arg1: tensor<512x128xf32>) -> tensor<32x128xf32> {
%0 = tensor.empty() : tensor<32x128xf32>
func.func @forward(%arg0: tensor<32xbf16>, %arg1: tensor<512x128xbf16>) -> tensor<32x128xbf16> {
%0 = tensor.empty() : tensor<32x128xbf16>
// CHECK: %[[C:.*]] = "ttnn.embedding"[[C:.*]]
%1 = "ttir.embedding"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32xf32>, tensor<512x128xf32>, tensor<32x128xf32>) -> tensor<32x128xf32>
return %1 : tensor<32x128xf32>
%1 = "ttir.embedding"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32xbf16>, tensor<512x128xbf16>, tensor<32x128xbf16>) -> tensor<32x128xbf16>
return %1 : tensor<32x128xbf16>
}
}
11 changes: 5 additions & 6 deletions test/ttmlir/Dialect/TTNN/embedding/embedding_non_tile.mlir
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
// RUN: ttmlir-opt --ttir-load-system-desc --ttir-layout --convert-ttir-to-ttnn %s | FileCheck %s
// UNSUPPORTED: true
// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline %s | FileCheck %s
#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
module attributes {} {
func.func @forward(%arg0: tensor<1x32xf32>, %arg1: tensor<512x128xf32>) -> tensor<1x32x128xf32> {
func.func @forward(%arg0: tensor<1x32xbf16>, %arg1: tensor<512x128xbf16>) -> tensor<1x32x128xbf16> {
// CHECK: %[[C:.*]] = "ttnn.empty"[[C:.*]]
%0 = tensor.empty() : tensor<1x32x128xf32>
%0 = tensor.empty() : tensor<1x32x128xbf16>
// CHECK: %[[C:.*]] = "ttnn.embedding"[[C:.*]]
%1 = "ttir.embedding"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32xf32>, tensor<512x128xf32>, tensor<1x32x128xf32>) -> tensor<1x32x128xf32>
return %1 : tensor<1x32x128xf32>
%1 = "ttir.embedding"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32xbf16>, tensor<512x128xbf16>, tensor<1x32x128xbf16>) -> tensor<1x32x128xbf16>
return %1 : tensor<1x32x128xbf16>
}
}
8 changes: 4 additions & 4 deletions test/ttmlir/Dialect/TTNN/embedding/simple_embedding.mlir
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline %s | FileCheck %s
#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
module attributes {} {
func.func @forward(%arg0: tensor<32x32xf32>, %arg1: tensor<512x128xf32>) -> tensor<32x32x128xf32> {
%0 = tensor.empty() : tensor<32x32x128xf32>
func.func @forward(%arg0: tensor<32x32xbf16>, %arg1: tensor<512x128xbf16>) -> tensor<32x32x128xbf16> {
%0 = tensor.empty() : tensor<32x32x128xbf16>
// CHECK: %[[C:.*]] = "ttnn.embedding"[[C:.*]]
%1 = "ttir.embedding"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x32xf32>, tensor<512x128xf32>, tensor<32x32x128xf32>) -> tensor<32x32x128xf32>
return %1 : tensor<32x32x128xf32>
%1 = "ttir.embedding"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x32xbf16>, tensor<512x128xbf16>, tensor<32x32x128xbf16>) -> tensor<32x32x128xbf16>
return %1 : tensor<32x32x128xbf16>
}
}
12 changes: 6 additions & 6 deletions test/ttmlir/Dialect/TTNN/remove_empty_op.mlir
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline %s | FileCheck %s
#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
module attributes {} {
func.func @forward(%arg0: tensor<32xf32>, %arg1: tensor<512x128xf32>) -> tensor<32x128xf32> {
func.func @forward(%arg0: tensor<4x2x32x32xbf16>) -> tensor<2x4x32x32xbf16> {
// CHECK-NOT: "ttnn.empty"
%0 = tensor.empty() : tensor<32x128xf32>
// CHECK: %[[C:.*]] = "ttnn.embedding"[[C:.*]]
%1 = "ttir.embedding"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32xf32>, tensor<512x128xf32>, tensor<32x128xf32>) -> tensor<32x128xf32>
return %1 : tensor<32x128xf32>
%0 = tensor.empty() : tensor<2x4x32x32xbf16>
// CHECK: %[[C:.*]] = "ttnn.reshape"[[C:.*]]
%1 = "ttir.reshape"(%arg0, %0) <{shape = [2: i32, 4: i32, 32: i32, 32: i32] , operand_constraints = [#any_device_tile, #any_device_tile]}> : (tensor<4x2x32x32xbf16>, tensor<2x4x32x32xbf16>) -> tensor<2x4x32x32xbf16>
return %1 : tensor<2x4x32x32xbf16>
}
}
9 changes: 4 additions & 5 deletions test/ttmlir/Silicon/TTNN/embedding/embedding_1d_tensor.mlir
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir
// RUN: FileCheck %s --input-file=%t.mlir
// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
// UNSUPPORTED: true
#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
module attributes {} {
func.func @forward(%arg0: tensor<32xf32>, %arg1: tensor<512x128xf32>) -> tensor<32x128xf32> {
func.func @forward(%arg0: tensor<32xbf16>, %arg1: tensor<512x128xbf16>) -> tensor<32x128xbf16> {
// CHECK: %[[C:.*]] = "ttnn.empty"[[C:.*]]
%0 = tensor.empty() : tensor<32x128xf32>
%0 = tensor.empty() : tensor<32x128xbf16>
// CHECK: %[[C:.*]] = "ttnn.embedding"[[C:.*]]
%1 = "ttir.embedding"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32xf32>, tensor<512x128xf32>, tensor<32x128xf32>) -> tensor<32x128xf32>
return %1 : tensor<32x128xf32>
%1 = "ttir.embedding"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32xbf16>, tensor<512x128xbf16>, tensor<32x128xbf16>) -> tensor<32x128xbf16>
return %1 : tensor<32x128xbf16>
}
}
11 changes: 5 additions & 6 deletions test/ttmlir/Silicon/TTNN/embedding/embedding_non_tile.mlir
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
// RUN: ttmlir-opt --ttir-load-system-desc="path=%system_desc_path%" --ttir-layout --convert-ttir-to-ttnn %s > %t.mlir
// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir
// RUN: FileCheck %s --input-file=%t.mlir
// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
// UNSUPPORTED: true
#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
module attributes {} {
func.func @forward(%arg0: tensor<1x32xf32>, %arg1: tensor<512x128xf32>) -> tensor<1x32x128xf32> {
func.func @forward(%arg0: tensor<1x32xbf16>, %arg1: tensor<512x128xbf16>) -> tensor<1x32x128xbf16> {
// CHECK: %[[C:.*]] = "ttnn.empty"[[C:.*]]
%0 = tensor.empty() : tensor<1x32x128xf32>
%0 = tensor.empty() : tensor<1x32x128xbf16>
// CHECK: %[[C:.*]] = "ttnn.embedding"[[C:.*]]
%1 = "ttir.embedding"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32xf32>, tensor<512x128xf32>, tensor<1x32x128xf32>) -> tensor<1x32x128xf32>
return %1 : tensor<1x32x128xf32>
%1 = "ttir.embedding"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32xbf16>, tensor<512x128xbf16>, tensor<1x32x128xbf16>) -> tensor<1x32x128xbf16>
return %1 : tensor<1x32x128xbf16>
}
}
9 changes: 4 additions & 5 deletions test/ttmlir/Silicon/TTNN/embedding/simple_embedding.mlir
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir
// RUN: FileCheck %s --input-file=%t.mlir
// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
// UNSUPPORTED: true
#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
module attributes {} {
func.func @forward(%arg0: tensor<32x32xf32>, %arg1: tensor<512x128xf32>) -> tensor<32x32x128xf32> {
func.func @forward(%arg0: tensor<32x32xbf16>, %arg1: tensor<512x128xbf16>) -> tensor<32x32x128xbf16> {
// CHECK: %[[C:.*]] = "ttnn.empty"[[C:.*]]
%0 = tensor.empty() : tensor<32x32x128xf32>
%0 = tensor.empty() : tensor<32x32x128xbf16>
// CHECK: %[[C:.*]] = "ttnn.embedding"[[C:.*]]
%1 = "ttir.embedding"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x32xf32>, tensor<512x128xf32>, tensor<32x32x128xf32>) -> tensor<32x32x128xf32>
return %1 : tensor<32x32x128xf32>
%1 = "ttir.embedding"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x32xbf16>, tensor<512x128xbf16>, tensor<32x32x128xbf16>) -> tensor<32x32x128xbf16>
return %1 : tensor<32x32x128xbf16>
}
}
13 changes: 13 additions & 0 deletions test/ttmlir/Silicon/TTNN/perf_unit/test_perf_embedding.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir
// RUN: FileCheck %s --input-file=%t.mlir
// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
module attributes {} {
func.func @forward(%arg0: tensor<32x32xbf16>, %arg1: tensor<512x128xbf16>) -> tensor<32x32x128xbf16> {
// CHECK: %[[C:.*]] = "ttnn.empty"[[C:.*]]
%0 = tensor.empty() : tensor<32x32x128xbf16>
// CHECK: %[[C:.*]] = "ttnn.embedding"[[C:.*]]
%1 = "ttir.embedding"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x32xbf16>, tensor<512x128xbf16>, tensor<32x32x128xbf16>) -> tensor<32x32x128xbf16>
return %1 : tensor<32x32x128xbf16>
}
}
Loading