From 6ad7dbb8656b64e0145851017b406477791a4639 Mon Sep 17 00:00:00 2001 From: umadevimcw Date: Wed, 24 Jul 2024 11:44:22 +0000 Subject: [PATCH] #10644: remove tensor creation ops --- docs/source/ttnn/ttnn/dependencies/tt_lib.rst | 15 -- models/demos/metal_BERT_large_11/tt/mha.py | 2 +- ...t_for_image_classification_with_teacher.py | 2 +- .../deit/tt/deit_self_attention.py | 2 +- .../distilbert/tt/distilbert_model.py | 3 +- .../tt/distilbert_multihead_self_attention.py | 2 +- .../tt/ttnn_functional_attention.py | 8 +- .../mistral/mistral_helper_funcs.py | 4 +- .../mistral/tt/mistral_attention.py | 8 +- .../mistral/tt/mistral_transformer.py | 2 +- .../nanogpt/tt/nanogpt_attention.py | 4 +- .../experimental/nanogpt/tt/nanogpt_model.py | 2 +- .../experimental/roberta/tt/roberta_model.py | 14 +- .../roberta/tt/roberta_self_attention.py | 2 +- models/experimental/ssd/tt/ssd.py | 3 +- .../stable_diffusion/tt/cross_attention.py | 2 +- .../stable_diffusion/tt/residual_block.py | 2 +- .../experimental/swin/tt/swin_embeddings.py | 2 +- .../swin/tt/swin_self_attention.py | 2 +- .../whisper/tt/whisper_attention.py | 3 +- .../tt/whisper_for_audio_classification.py | 32 +-- .../pytests/tt_dnn/test_composite.py | 8 - .../sweep_tests/tt_lib_ops.py | 104 ++++++++ .../unit_testing/misc/test_concat.py | 3 +- .../unit_testing/misc/test_eps.py | 3 +- .../unit_testing/misc/test_moreh_adamw.py | 3 +- .../unit_testing/misc/test_repeat.py | 3 +- .../unit_tests/operations/test_creation.py | 4 +- .../op_library/composite/composite_ops.cpp | 115 +-------- .../op_library/composite/composite_ops.hpp | 78 ------ .../op_library/optimizer/optimizer_ops.cpp | 6 +- .../op_library/optimizer/optimizer_ops.hpp | 1 - .../tt_lib_bindings_tensor_composite_ops.cpp | 225 ------------------ .../device/binary_backward_op.cpp | 14 +- .../device/unary_backward_op.cpp | 21 +- ttnn/ttnn/operations/creation.py | 28 +-- 36 files changed, 190 insertions(+), 542 deletions(-) diff --git a/docs/source/ttnn/ttnn/dependencies/tt_lib.rst b/docs/source/ttnn/ttnn/dependencies/tt_lib.rst index 82d8e92c54d7..0b1ce63888ff 100644 --- a/docs/source/ttnn/ttnn/dependencies/tt_lib.rst +++ b/docs/source/ttnn/ttnn/dependencies/tt_lib.rst @@ -330,23 +330,8 @@ but in general retaining the data. Tensor creation operations ========================== -.. autofunction:: tt_lib.tensor.arange - -.. autofunction:: tt_lib.tensor.full - -.. autofunction:: tt_lib.tensor.ones - -.. autofunction:: tt_lib.tensor.ones_like - -.. autofunction:: tt_lib.tensor.zeros - -.. autofunction:: tt_lib.tensor.zeros_like - -.. autofunction:: tt_lib.tensor.full_like - .. autofunction:: tt_lib.tensor.split_last_dim_two_chunks_tiled -.. autofunction:: tt_lib.tensor.empty Broadcast and Reduce ==================== diff --git a/models/demos/metal_BERT_large_11/tt/mha.py b/models/demos/metal_BERT_large_11/tt/mha.py index b0e0f0b1d0ac..4fc1cde6620c 100644 --- a/models/demos/metal_BERT_large_11/tt/mha.py +++ b/models/demos/metal_BERT_large_11/tt/mha.py @@ -138,7 +138,7 @@ def op6_unmake_attention_heads(x): def mha_(activation, attention_mask): # TODO: Remove hardcoded shape hack if reserve_split_heads_shape is not None: - temp = tt_lib.tensor.empty( + temp = ttnn.empty( reserve_split_heads_shape, tt_lib.tensor.DataType.BFLOAT16, tt_lib.tensor.Layout.ROW_MAJOR, diff --git a/models/experimental/deit/tt/deit_for_image_classification_with_teacher.py b/models/experimental/deit/tt/deit_for_image_classification_with_teacher.py index 89b3674defb1..edb15e3d6e24 100644 --- a/models/experimental/deit/tt/deit_for_image_classification_with_teacher.py +++ b/models/experimental/deit/tt/deit_for_image_classification_with_teacher.py @@ -76,7 +76,7 @@ def forward( # during inference, return the average of both classifier predictions logits = ttnn.add(cls_logits, distillation_logits) - half = tt_lib.tensor.full(logits.get_legacy_shape(), 0.5) + half = ttnn.full(logits.get_legacy_shape(), 0.5) logits = ttnn.mul(logits, half) # if not return_dict: diff --git a/models/experimental/deit/tt/deit_self_attention.py b/models/experimental/deit/tt/deit_self_attention.py index c4ea6a8c2e90..4934b87b4d7e 100644 --- a/models/experimental/deit/tt/deit_self_attention.py +++ b/models/experimental/deit/tt/deit_self_attention.py @@ -67,7 +67,7 @@ def forward( attention_scores = ttnn.matmul(query_layer, key_layer_transposed) - attention_head_size_tt = tt_lib.tensor.full(attention_scores.get_legacy_shape(), self.attention_head_size) + attention_head_size_tt = ttnn.full(attention_scores.get_legacy_shape(), self.attention_head_size) attention_head_size_tt = ttnn.sqrt(attention_head_size_tt) attention_head_size_tt = ttnn.reciprocal(attention_head_size_tt) diff --git a/models/experimental/distilbert/tt/distilbert_model.py b/models/experimental/distilbert/tt/distilbert_model.py index ca76228c7947..56f43175120b 100644 --- a/models/experimental/distilbert/tt/distilbert_model.py +++ b/models/experimental/distilbert/tt/distilbert_model.py @@ -11,6 +11,7 @@ ) import tt_lib +import ttnn from dataclasses import dataclass from models.experimental.distilbert.tt.distilbert_embedding import TtDistilBert_Embeddings @@ -105,7 +106,7 @@ def forward( if attention_mask is not None: input_shape[0:0] = [1, 1] - attention_mask = tt_lib.tensor.ones(input_shape) + attention_mask = ttnn.ones(input_shape) head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers) """ diff --git a/models/experimental/distilbert/tt/distilbert_multihead_self_attention.py b/models/experimental/distilbert/tt/distilbert_multihead_self_attention.py index d0c69df09b95..384b37bc89bb 100644 --- a/models/experimental/distilbert/tt/distilbert_multihead_self_attention.py +++ b/models/experimental/distilbert/tt/distilbert_multihead_self_attention.py @@ -65,7 +65,7 @@ def __init__(self, config, state_dict=None, base_address="", device=None): self.attention_head_size = self.dim // self.n_heads def const_tensor(self, shape: List[int], value: int) -> tt_lib.tensor.Tensor: - return tt_lib.tensor.full(shape, value) + return ttnn.full(shape, value) def get_min(self, tensor: tt_lib.tensor.Tensor): tensor = tt_to_torch_tensor(tensor) diff --git a/models/experimental/functional_mistral/tt/ttnn_functional_attention.py b/models/experimental/functional_mistral/tt/ttnn_functional_attention.py index 25478e281dde..d644bfd30054 100644 --- a/models/experimental/functional_mistral/tt/ttnn_functional_attention.py +++ b/models/experimental/functional_mistral/tt/ttnn_functional_attention.py @@ -95,18 +95,18 @@ def attention(config, x, bcast_freq_xq, bcast_freq_xk, positions, mask, seqlen, scatter_pos = scatter_pos.to(torch.int64) scatter_pos = scatter_pos.repeat(bsz, 1, config.n_kv_heads, config.head_dim) - cache_k = tt_lib.tensor.empty( + cache_k = ttnn.empty( [config.max_batch_size, config.sliding_window, config.n_kv_heads, config.head_dim], layout=tt_lib.tensor.Layout.ROW_MAJOR, device=device, - output_mem_config=config.out_mem_config, + memory_config=config.out_mem_config, ) cache_k = tt_to_torch_tensor(cache_k).to(torch.float32) - cache_v = tt_lib.tensor.empty( + cache_v = ttnn.empty( [config.max_batch_size, config.sliding_window, config.n_kv_heads, config.head_dim], layout=tt_lib.tensor.Layout.ROW_MAJOR, device=device, - output_mem_config=config.out_mem_config, + memory_config=config.out_mem_config, ) cache_v = tt_to_torch_tensor(cache_v).to(torch.float32) cache_k[:bsz].scatter_(dim=1, index=scatter_pos, src=xk[:, -config.sliding_window :]) diff --git a/models/experimental/mistral/mistral_helper_funcs.py b/models/experimental/mistral/mistral_helper_funcs.py index 48e1d9e467b9..25248546229c 100644 --- a/models/experimental/mistral/mistral_helper_funcs.py +++ b/models/experimental/mistral/mistral_helper_funcs.py @@ -113,7 +113,7 @@ def get_freqs_cis(freqs_cis: torch.Tensor, query_shape, key_shape, device=None, BCH = tt_lib.tensor.BcastOpDim.HW BCMUL = tt_lib.tensor.BcastOpMath.MUL - t_one_xq = tt_lib.tensor.ones(query_shape, output_mem_config=mem_config) + t_one_xq = ttnn.ones(query_shape, memory_config=mem_config) t_one_xq = ttnn.permute(t_one_xq, (3, 1, 2, 0), memory_config=mem_config) freqs_real = ttnn.permute(freqs_cis.real, (3, 1, 2, 0), memory_config=mem_config) @@ -130,7 +130,7 @@ def get_freqs_cis(freqs_cis: torch.Tensor, query_shape, key_shape, device=None, bcast_freq_re_xq.deallocate() bcast_freq_im_xq.deallocate() - t_one_xk = tt_lib.tensor.ones(key_shape, output_mem_config=mem_config) + t_one_xk = ttnn.ones(key_shape, memory_config=mem_config) t_one_xk = ttnn.permute(t_one_xk, (3, 1, 2, 0), memory_config=mem_config) bcast_freq_re_xk = tt_lib.tensor.bcast(t_one_xk, freqs_real, BCMUL, BCH, output_mem_config=mem_config) diff --git a/models/experimental/mistral/tt/mistral_attention.py b/models/experimental/mistral/tt/mistral_attention.py index dfcf681c8eaf..79de8465ba19 100644 --- a/models/experimental/mistral/tt/mistral_attention.py +++ b/models/experimental/mistral/tt/mistral_attention.py @@ -87,18 +87,18 @@ def __init__( ) self.cache_v = torch.empty(args.max_batch_size, args.sliding_window, self.n_kv_heads, self.args.head_dim) else: - cache_k = tt_lib.tensor.empty( + cache_k = ttnn.empty( [args.max_batch_size, args.sliding_window, self.n_kv_heads, self.args.head_dim], layout=tt_lib.tensor.Layout.ROW_MAJOR, device=self.device, - output_mem_config=self.args.out_mem_config, + memory_config=self.args.out_mem_config, ) self.cache_k = tt_to_torch_tensor(cache_k).to(torch.float32) - cache_v = tt_lib.tensor.empty( + cache_v = ttnn.empty( [args.max_batch_size, args.sliding_window, self.n_kv_heads, self.args.head_dim], layout=tt_lib.tensor.Layout.ROW_MAJOR, device=self.device, - output_mem_config=self.args.out_mem_config, + memory_config=self.args.out_mem_config, ) self.cache_v = tt_to_torch_tensor(cache_v).to(torch.float32) diff --git a/models/experimental/mistral/tt/mistral_transformer.py b/models/experimental/mistral/tt/mistral_transformer.py index 57dc7d489bcb..cde66cedbe89 100644 --- a/models/experimental/mistral/tt/mistral_transformer.py +++ b/models/experimental/mistral/tt/mistral_transformer.py @@ -97,7 +97,7 @@ def forward( mask: Optional[torch.Tensor] = None if input_ids.get_legacy_shape()[-1] > 1: seqlen = input_ids.get_legacy_shape()[-1] - tensor = tt_lib.tensor.full( + tensor = ttnn.full( (1, 1, seqlen, seqlen), fill_value=1.0, ) diff --git a/models/experimental/nanogpt/tt/nanogpt_attention.py b/models/experimental/nanogpt/tt/nanogpt_attention.py index 54723fbb97ba..f7c4ba735ebe 100644 --- a/models/experimental/nanogpt/tt/nanogpt_attention.py +++ b/models/experimental/nanogpt/tt/nanogpt_attention.py @@ -48,7 +48,7 @@ def __init__(self, config, base_address, device, tt_cache_path, dtype): self.n_head = self.config.n_head self.n_embd = self.config.n_embd - temp_bias = ttnn.tril(tt_lib.tensor.ones([1, 1, self.block_size, self.block_size])) + temp_bias = ttnn.tril(ttnn.ones([1, 1, self.block_size, self.block_size])) temp_bias = tt_to_torch_tensor(temp_bias) self.register_buffer( "bias", @@ -69,7 +69,7 @@ def __init__(self, config, base_address, device, tt_cache_path, dtype): ) def const_tensor(self, shape, value): - return tt_lib.tensor.full(shape, value) + return ttnn.full(shape, value) def forward(self, x: tt_lib.tensor.Tensor) -> tt_lib.tensor.Tensor: ( diff --git a/models/experimental/nanogpt/tt/nanogpt_model.py b/models/experimental/nanogpt/tt/nanogpt_model.py index 2c71167357f6..37aed3d55cd9 100644 --- a/models/experimental/nanogpt/tt/nanogpt_model.py +++ b/models/experimental/nanogpt/tt/nanogpt_model.py @@ -67,7 +67,7 @@ def forward(self, idx: torch.Tensor) -> tt_lib.tensor.Tensor: assert ( t <= self.config.block_size ), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}" - pos = tt_lib.tensor.arange(0, t, 1) + pos = ttnn.arange(0, t, 1) pos = tt_to_torch_tensor(pos) pos = pos.squeeze(0).squeeze(0) pos = pos.to(dtype=torch.int64) diff --git a/models/experimental/roberta/tt/roberta_model.py b/models/experimental/roberta/tt/roberta_model.py index 42cd00e53c33..836d2aada772 100644 --- a/models/experimental/roberta/tt/roberta_model.py +++ b/models/experimental/roberta/tt/roberta_model.py @@ -10,7 +10,7 @@ from typing import Optional, Tuple, Union, List import tt_lib - +import ttnn from models.experimental.roberta.tt.roberta_encoder import TtRobertaEncoder from models.experimental.roberta.tt.roberta_pooler import TtRobertaPooler from models.experimental.roberta.tt.roberta_embeddings import PytorchEmbeddings @@ -170,8 +170,8 @@ def get_extended_attention_mask( # positions we want to attend and the dtype's smallest value for masked positions. # Since we are adding it to the raw scores before the softmax, this is # effectively the same as removing these entirely. - self.ones_const = tt_lib.tensor.full(extended_attention_mask.get_legacy_shape(), 1.0) - self.mul_const = tt_lib.tensor.full(extended_attention_mask.get_legacy_shape(), self.dtype_min_const) + self.ones_const = ttnn.full(extended_attention_mask.get_legacy_shape(), 1.0) + self.mul_const = ttnn.full(extended_attention_mask.get_legacy_shape(), self.dtype_min_const) extended_attention_mask = ttnn.sub(self.ones_const, extended_attention_mask, memory_config=self.mem_config) extended_attention_mask = ttnn.mul(extended_attention_mask, self.mul_const, memory_config=self.mem_config) @@ -196,8 +196,8 @@ def invert_attention_mask(self, encoder_attention_mask: tt_lib.tensor.Tensor) -> encoder_extended_attention_mask = torch2tt_tensor(torch_encoder_extended_attention_mask, self.device) - self.ones_const = tt_lib.tensor.full(encoder_extended_attention_mask.get_legacy_shape(), 1.0) - self.mul_const = tt_lib.tensor.full(encoder_extended_attention_mask.get_legacy_shape(), self.dtype_min_const) + self.ones_const = ttnn.full(encoder_extended_attention_mask.get_legacy_shape(), 1.0) + self.mul_const = ttnn.full(encoder_extended_attention_mask.get_legacy_shape(), self.dtype_min_const) encoder_extended_attention_mask = ttnn.sub( self.ones_const, @@ -339,7 +339,7 @@ def forward( past_key_values_length = past_key_values[0][0].get_legacy_shape()[2] if past_key_values is not None else 0 if attention_mask is None: - attention_mask = tt_lib.tensor.full((1, 1, batch_size, seq_length + past_key_values_length), 0.0) + attention_mask = ttnn.full((1, 1, batch_size, seq_length + past_key_values_length), 0.0) if token_type_ids is None: if hasattr(self.embeddings, "token_type_ids"): @@ -364,7 +364,7 @@ def forward( ) = encoder_hidden_states.get_legacy_shape() encoder_hidden_shape = (1, 1, encoder_batch_size, encoder_sequence_length) if encoder_attention_mask is None: - encoder_attention_mask = tt_lib.tensor.full(encoder_hidden_shape, 1.1) + encoder_attention_mask = ttnn.full(encoder_hidden_shape, 1.1) encoder_extended_attention_mask = self.invert_attention_mask(encoder_attention_mask) else: encoder_extended_attention_mask = None diff --git a/models/experimental/roberta/tt/roberta_self_attention.py b/models/experimental/roberta/tt/roberta_self_attention.py index 1068c0aa5985..a220aa2f88d0 100644 --- a/models/experimental/roberta/tt/roberta_self_attention.py +++ b/models/experimental/roberta/tt/roberta_self_attention.py @@ -184,7 +184,7 @@ def forward( # back to tt attention_scores = torch2tt_tensor(attention_scores, self.device) - div_const = tt_lib.tensor.full( + div_const = ttnn.full( attention_scores.get_legacy_shape(), 1.0 / math.sqrt(self.attention_head_size), ) diff --git a/models/experimental/ssd/tt/ssd.py b/models/experimental/ssd/tt/ssd.py index c764469aa052..44b3061961b3 100644 --- a/models/experimental/ssd/tt/ssd.py +++ b/models/experimental/ssd/tt/ssd.py @@ -5,6 +5,7 @@ import torch from torch import nn import tt_lib +import ttnn import tt_lib.fallback_ops as fallback_ops from typing import List, Optional, Tuple, Dict, OrderedDict @@ -102,7 +103,7 @@ def __init__( def get_in_channels(self, backbone: TtSSDLiteFeatureExtractorMobileNet): size = (320, 320) - temporary_image = tt_lib.tensor.ones([1, 3, size[1], size[0]], device=self.device) + temporary_image = ttnn.ones([1, 3, size[1], size[0]], device=self.device) backbone.eval() features = backbone(temporary_image) out_channels = [tensor.get_legacy_shape()[1] for i, tensor in features.items()] diff --git a/models/experimental/stable_diffusion/tt/cross_attention.py b/models/experimental/stable_diffusion/tt/cross_attention.py index f21c6525c2e0..772c73ef8e9c 100644 --- a/models/experimental/stable_diffusion/tt/cross_attention.py +++ b/models/experimental/stable_diffusion/tt/cross_attention.py @@ -186,7 +186,7 @@ def get_attention_scores( # self.scale, # self.scale) - scale_tensor = ttl.tensor.full(temp.get_legacy_shape(), self.scale) + scale_tensor = ttnn.full(temp.get_legacy_shape(), self.scale) attention_scores = ttnn.mul(scale_tensor, temp) if attention_mask is not None: diff --git a/models/experimental/stable_diffusion/tt/residual_block.py b/models/experimental/stable_diffusion/tt/residual_block.py index 71d2df5e7fdb..4ab373a8b476 100644 --- a/models/experimental/stable_diffusion/tt/residual_block.py +++ b/models/experimental/stable_diffusion/tt/residual_block.py @@ -205,7 +205,7 @@ def forward(self, input_tensor: ttl.tensor.Tensor, temb: ttl.tensor.Tensor) -> t # create a tensor of size output_scale_factor output_sc_recip = 1 / self.output_scale_factor - output_sc_recip = ttl.tensor.full(input_tensor.get_legacy_shape(), output_sc_recip) + output_sc_recip = ttnn.full(input_tensor.get_legacy_shape(), output_sc_recip) output_tensor = ttnn.add(input_tensor, hidden_states) output_tensor = ttnn.mul(output_tensor, output_sc_recip) diff --git a/models/experimental/swin/tt/swin_embeddings.py b/models/experimental/swin/tt/swin_embeddings.py index 69dcf8c62181..a60a222e35c7 100644 --- a/models/experimental/swin/tt/swin_embeddings.py +++ b/models/experimental/swin/tt/swin_embeddings.py @@ -44,7 +44,7 @@ def __init__(self, config, state_dict, base_address, device, use_mask_token=Fals self.norm = fallback_ops.LayerNorm(gamma, beta, normalized_shape=config.embed_dim, eps=config.layer_norm_eps) def const_tensor(self, shape, value): - return tt_lib.tensor.full(shape, value) + return ttnn.full(shape, value) def forward( self, diff --git a/models/experimental/swin/tt/swin_self_attention.py b/models/experimental/swin/tt/swin_self_attention.py index 3d9e98505f38..a0355b8c1af9 100644 --- a/models/experimental/swin/tt/swin_self_attention.py +++ b/models/experimental/swin/tt/swin_self_attention.py @@ -70,7 +70,7 @@ def __init__( self.value_bias = torch_to_tt_tensor_rm(state_dict[f"{base_address}.value.bias"], self.device) def const_tensor(self, shape, value): - return tt_lib.tensor.full(shape, value) + return ttnn.full(shape, value) def transpose_for_scores(self, x: tt_lib.tensor.Tensor) -> tt_lib.tensor.Tensor: # x must be 4d originaly diff --git a/models/experimental/whisper/tt/whisper_attention.py b/models/experimental/whisper/tt/whisper_attention.py index cfc694fe1702..444c86319d8a 100644 --- a/models/experimental/whisper/tt/whisper_attention.py +++ b/models/experimental/whisper/tt/whisper_attention.py @@ -5,6 +5,7 @@ import torch import torch.nn as nn import tt_lib +import ttnn from typing import Optional, Tuple, Union from models.utility_functions import torch2tt_tensor, tt2torch_tensor @@ -114,7 +115,7 @@ def forward( if q_proj_shape == self.cached_q_proj_shape: q_proj_mul_const = self.q_proj_mul_const else: - self.q_proj_mul_const = tt_lib.tensor.full(q_proj_shape, self.scaling) + self.q_proj_mul_const = ttnn.full(q_proj_shape, self.scaling) self.cached_q_proj_shape = q_proj_shape q_proj_mul_const = self.q_proj_mul_const diff --git a/models/experimental/whisper/tt/whisper_for_audio_classification.py b/models/experimental/whisper/tt/whisper_for_audio_classification.py index a7f946806d13..65d7dcebaf75 100644 --- a/models/experimental/whisper/tt/whisper_for_audio_classification.py +++ b/models/experimental/whisper/tt/whisper_for_audio_classification.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import tt_lib +import ttnn import torch import torch.nn as nn from dataclasses import dataclass @@ -14,6 +15,7 @@ from models.experimental.whisper.tt.whisper_encoder import TtWhisperEncoder + @dataclass class TtWhisperForAudioClassificationOutput: loss: Optional[tt_lib.tensor.Tensor] = None @@ -37,16 +39,12 @@ def __init__(self, state_dict, device, config): config=config, ) - num_layers = ( - config.num_hidden_layers + 1 - ) # transformer layers + input embeddings + num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings if config.use_weighted_layer_sum: # Not using this parameter for now N, C, H, W = 1, 1, 1, num_layers weight_init_const = 1.0 / num_layers - self.layer_weights = tt_lib.tensor.full( - (1, 1, 1, num_layers), weight_init_const - ) + self.layer_weights = ttnn.full((1, 1, 1, num_layers), weight_init_const) self.projector_weight = torch2tt_tensor( state_dict[f"projector.weight"], self.device, tt_lib.tensor.Layout.ROW_MAJOR @@ -122,19 +120,11 @@ def forward( 'af_za' ```""" - output_attentions = ( - output_attentions - if output_attentions is not None - else self.config.output_attentions - ) + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( - output_hidden_states - if output_hidden_states is not None - else self.config.output_hidden_states - ) - return_dict = ( - return_dict if return_dict is not None else self.config.use_return_dict + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict if encoder_outputs is None: encoder_outputs = self.encoder( @@ -160,17 +150,13 @@ def forward( hidden_states = encoder_outputs.last_hidden_state # Apply Linear layer - hidden_states = linear( - hidden_states, self.projector_weight, self.projector_bias - ) + hidden_states = linear(hidden_states, self.projector_weight, self.projector_bias) # Torch mean torch_hidden_states = tt2torch_tensor(hidden_states) torch_pooled_output = torch_hidden_states.mean(dim=-2) # If something changes these dimension -2 should always work - pooled_output = torch2tt_tensor( - torch_pooled_output, self.device, tt_lib.tensor.Layout.ROW_MAJOR - ) + pooled_output = torch2tt_tensor(torch_pooled_output, self.device, tt_lib.tensor.Layout.ROW_MAJOR) # Apply classifier layer logits = linear(pooled_output, self.classifier_weight, self.classifier_bias) diff --git a/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_composite.py b/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_composite.py index 40db7882e71f..d669f7595e17 100644 --- a/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_composite.py +++ b/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_composite.py @@ -74,14 +74,6 @@ def custom_compare(*args, **kwargs): "hypot", "hardswish", "hardsigmoid", - "ones_like", - "zeros_like", - "full_like", - "ones", - "empty", - "zeros", - "full", - "arange", "hardshrink", "softshrink", "sinh", diff --git a/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py b/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py index e1e8890b9b99..64d5a5f8919e 100644 --- a/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py +++ b/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py @@ -1166,6 +1166,48 @@ def eltwise_unary_lt( return tt2torch_tensor(t1) +@setup_host_and_device +def full_like( + x, + *args, + scalar, + device, + dtype, + layout, + input_mem_config, + output_mem_config, + **kwargs, +): + t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0]) + t1 = ttnn.full_like(t0, scalar, memory_config=output_mem_config) + + return tt2torch_tensor(t1) + + +@setup_host_and_device +def ones(x, *args, device, dtype, layout, input_mem_config, output_mem_config, **kwargs): + t1 = ttnn.ones( + x.shape, + layout=layout[0], + device=device if input_mem_config[0] is not None else None, + memory_config=output_mem_config, + ) + + return tt2torch_tensor(t1) + + +@setup_host_and_device +def zeros(x, *args, device, dtype, layout, input_mem_config, output_mem_config, **kwargs): + t1 = ttnn.zeros( + x.shape, + layout=layout[0], + device=device if input_mem_config[0] is not None else None, + memory_config=output_mem_config, + ) + + return tt2torch_tensor(t1) + + @setup_host_and_device def triu(x, *args, device, dtype, layout, input_mem_config, output_mem_config, **kwargs): tx = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0]) @@ -1182,6 +1224,41 @@ def tril(x, *args, device, dtype, layout, input_mem_config, output_mem_config, * return tt2torch_tensor(t1) +@setup_host_and_device +def empty(x, *args, device, dtype, layout, input_mem_config, output_mem_config, **kwargs): + t1 = ttnn.empty( + x.shape, + layout=layout[0], + device=device if input_mem_config[0] is not None else None, + memory_config=output_mem_config, + ) + + return tt2torch_tensor(t1) + + +@setup_host_and_device +def full( + x, + *args, + scalar, + device, + dtype, + layout, + input_mem_config, + output_mem_config, + **kwargs, +): + t1 = ttnn.full( + x.shape, + scalar, + layout=layout[0], + device=device if input_mem_config[0] is not None else None, + memory_config=output_mem_config, + ) + + return tt2torch_tensor(t1) + + @setup_host_and_device def fill_rm( x, @@ -1242,6 +1319,31 @@ def fill_ones_rm( return tt2torch_tensor(t1) +@setup_host_and_device +def arange( + x, + *args, + start, + end, + step=1, + device, + dtype, + layout, + input_mem_config, + output_mem_config, + **kwargs, +): + t1 = ttnn.arange( + start, + end, + step, + device=device if input_mem_config[0] is not None else None, + memory_config=output_mem_config, + ) + + return tt2torch_tensor(t1) + + @setup_host_and_device def prod( x, @@ -2213,6 +2315,8 @@ def unary_op_optional_output_with_fast_approx( eltwise_nez = make_unary_op_optional_output(ttnn.nez) eltwise_eqz = make_unary_op_optional_output(ttnn.eqz) eltwise_assign_unary = make_unary_op(ttl.tensor.assign) +zeros_like = make_ttnn_unary_op(ttnn.zeros_like) +ones_like = make_ttnn_unary_op(ttnn.ones_like) # eltwise_logical_not = make_unary_op(ttl.tensor.logical_not) transpose_wh = make_unary_op(partial(ttl.tensor.transpose, dim0=-2, dim1=-1)) transpose_hc = make_unary_op(partial(ttl.tensor.transpose, dim0=1, dim1=-2)) diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_concat.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_concat.py index 622bc32b6249..7f7dc404bf0e 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_concat.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_concat.py @@ -9,6 +9,7 @@ import torch import tt_lib as ttl +import ttnn from models.utility_functions import print_diff_argmax import pytest from loguru import logger @@ -164,7 +165,7 @@ def test_concat_with_program_cache( shapes, dim, device, layout, dtype, input_mem_config, output_mem_config, use_program_cache, function_level_defaults ): run_concat(shapes, dim, device, layout, dtype, input_mem_config, output_mem_config) - tmp = ttl.tensor.empty([1, 256, 32, 32], ttl.tensor.DataType.BFLOAT16, ttl.tensor.Layout.TILE, device) + tmp = ttnn.empty([1, 256, 32, 32], ttl.tensor.DataType.BFLOAT16, ttl.tensor.Layout.TILE, device) run_concat(shapes, dim, device, layout, dtype, input_mem_config, output_mem_config) diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_eps.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_eps.py index fd1f4140fe7c..9c188bfb24f0 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_eps.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_eps.py @@ -7,6 +7,7 @@ import numpy as np import tt_lib as ttl +import ttnn from models.utility_functions import is_wormhole_b0 from ttnn.device import Arch @@ -31,7 +32,7 @@ def test_run_sfpu_eps(device): def test_run_sfpu_tensor(device): value = device.sfpu_eps() shape = [1, 1, 32, 32] - eps = ttl.tensor.sfpu_eps(ttl.tensor.Shape(shape), ttl.tensor.Layout.ROW_MAJOR, device) + eps = ttnn.full(ttl.tensor.Shape(shape), value) eps = eps.cpu().to(ttl.tensor.Layout.ROW_MAJOR).to_torch() passing = np.isclose(np.ones((1, 1, 32, 32)) * value, eps.float()).all() assert passing diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_moreh_adamw.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_moreh_adamw.py index ff4f9c5a2dc5..db978c6e11ec 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_moreh_adamw.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_moreh_adamw.py @@ -7,6 +7,7 @@ import torch.optim as optim import tt_lib as ttl +import ttnn import pytest from models.utility_functions import ( skip_for_wormhole_b0, @@ -35,7 +36,7 @@ def create_tt_tensor(x, device): return ret def create_empty_tensor(x, device): - ret = ttl.tensor.empty(x.shape, ttl.tensor.DataType.BFLOAT16, ttl.tensor.Layout.TILE, device) + ret = ttnn.empty(x.shape, ttl.tensor.DataType.BFLOAT16, ttl.tensor.Layout.TILE, device) return ret # input tensors diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_repeat.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_repeat.py index 325a84ceac7b..cc40d6e39a92 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_repeat.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_repeat.py @@ -9,6 +9,7 @@ import torch import tt_lib as ttl +import ttnn from models.utility_functions import print_diff_argmax import pytest from loguru import logger @@ -155,5 +156,5 @@ def test_repeat_with_program_cache( function_level_defaults, ): run_repeat(input_shape, repeats, device, layout, dtype, input_mem_config, output_mem_config) - tmp = ttl.tensor.empty([1, 256, 32, 32], ttl.tensor.DataType.BFLOAT16, ttl.tensor.Layout.TILE, device) + tmp = ttnn.empty([1, 256, 32, 32], ttl.tensor.DataType.BFLOAT16, ttl.tensor.Layout.TILE, device) run_repeat(input_shape, repeats, device, layout, dtype, input_mem_config, output_mem_config) diff --git a/tests/ttnn/unit_tests/operations/test_creation.py b/tests/ttnn/unit_tests/operations/test_creation.py index 92282e9aa948..1aff7f0e63e3 100644 --- a/tests/ttnn/unit_tests/operations/test_creation.py +++ b/tests/ttnn/unit_tests/operations/test_creation.py @@ -157,7 +157,9 @@ def test_arange(device, start, end, step): input_tensor = ttnn.from_torch(torch_input_tensor, layout=ttnn.TILE_LAYOUT) input_tensor = ttnn.to_device(input_tensor, device) - output_tensor = ttnn.arange(input_tensor.shape[0], input_tensor.shape[1], input_tensor.shape[2], device) + output_tensor = ttnn.arange( + input_tensor.shape[0], input_tensor.shape[1], input_tensor.shape[2], ttnn.bfloat16, device + ) output_tensor = ttnn.to_layout(output_tensor, ttnn.ROW_MAJOR_LAYOUT) output_tensor = ttnn.from_device(output_tensor) output_tensor = ttnn.to_torch(output_tensor) diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.cpp b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.cpp index 2d2f9ddbdcac..ec42fc7f4b1d 100644 --- a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.cpp +++ b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.cpp @@ -198,7 +198,7 @@ Tensor mac(const Tensor& input_a, float b, float c, const MemoryConfig& output_m Tensor _logical_andi(const Tensor& input_a, float immediate, const MemoryConfig& output_mem_config) { if (std::fpclassify(immediate) == FP_ZERO) { - return full_like(input_a, immediate, output_mem_config); + return ttnn::full_like(input_a, immediate); } else { return ttnn::nez(input_a); } @@ -244,7 +244,7 @@ Tensor _logical_ori(const Tensor& input_a, float immediate, const MemoryConfig& if (std::fpclassify(immediate) == FP_ZERO) { return ttnn::nez(input_a, output_mem_config); } else { - return full_like(input_a, 1, output_mem_config); + return ttnn::full_like(input_a, 1); } } Tensor logical_ori(const Tensor& input_a, float immediate, const MemoryConfig& output_mem_config) { @@ -252,7 +252,7 @@ Tensor logical_ori(const Tensor& input_a, float immediate, const MemoryConfig& o } Tensor _logical_noti(const Tensor& input_a, float immediate, const MemoryConfig& output_mem_config) { - Tensor t_imm = full_like(input_a, immediate, output_mem_config); + Tensor t_imm = ttnn::full_like(input_a, immediate); Tensor result = ttnn::logical_not(t_imm, output_mem_config); return result; } @@ -274,8 +274,8 @@ Tensor _div(const Tensor& input_a, const Tensor& input_b, bool accurate_mode, st return result; } - Tensor t_inf = full_like(input_a, std::numeric_limits::infinity(), output_mem_config); - Tensor t_nan = full_like(input_a, std::nanf(""), output_mem_config); + Tensor t_inf = ttnn::full_like(input_a, std::numeric_limits::infinity()); + Tensor t_nan = ttnn::full_like(input_a, std::nanf("")); return ttnn::where( ttnn::eqz(input_b, output_mem_config), ttnn::where( @@ -393,8 +393,8 @@ Tensor floor_div(const Tensor& input_a, const Tensor& input_b, const MemoryConfi Tensor _floor_div_overload(const Tensor& input, float value, const MemoryConfig& output_mem_config) { if (value == 0) { - Tensor t_inf = full_like(input, std::numeric_limits::infinity(), output_mem_config); - Tensor t_nan = full_like(input, std::nanf(""), output_mem_config); + Tensor t_inf = ttnn::full_like(input, std::numeric_limits::infinity()); + Tensor t_nan = ttnn::full_like(input, std::nanf("")); return ttnn::where( ttnn::eqz(input, output_mem_config), t_nan, @@ -427,7 +427,7 @@ Tensor div_no_nan(const Tensor& input_a, const Tensor& input_b, const MemoryConf Tensor _div_no_nan_overload(const Tensor& input_a, float value, const MemoryConfig& output_mem_config) { if (value == 0) - return full_like(input_a, 0.0f, output_mem_config); + return ttnn::full_like(input_a, 0.0f); else return ttnn::multiply(input_a, (1.0f/value)); } @@ -442,7 +442,7 @@ Tensor _remainder(const Tensor& input_a, const Tensor& input_b, const MemoryConf Tensor result = ttnn::subtract(a, ttnn::multiply(b, floor_div(input_a, input_b, output_mem_config), std::nullopt, output_mem_config), std::nullopt, output_mem_config); result = ttnn::where(ttnn::ge(result, b), ttnn::subtract(result, b), result); result = ttnn::where(ttnn::ltz(b), ttnn::add(result, b), result); - result = ttnn::where(ttnn::eq(a, b, std::nullopt, output_mem_config), full_like(input_a, 0.0f, output_mem_config), result, output_mem_config); + result = ttnn::where(ttnn::eq(a, b, std::nullopt, output_mem_config), ttnn::full_like(input_a, 0.0f), result, output_mem_config); return ttnn::typecast(result, input_dtype); } Tensor remainder(const Tensor& input_a, const Tensor& input_b, const MemoryConfig& output_mem_config) { @@ -454,7 +454,7 @@ Tensor _fmod(const Tensor& input_a, const Tensor& input_b, const MemoryConfig& o Tensor a = ttnn::typecast(input_a, DataType::FLOAT32); Tensor b = ttnn::typecast(input_b, DataType::FLOAT32); Tensor result = ttnn::subtract(a, ttnn::multiply(div(input_a, input_b, true, "trunc", output_mem_config), b, std::nullopt, output_mem_config), std::nullopt, output_mem_config); - result = ttnn::where(ttnn::eq(a, b, std::nullopt, output_mem_config), full_like(input_a, 0.0f, output_mem_config), result, output_mem_config); + result = ttnn::where(ttnn::eq(a, b, std::nullopt, output_mem_config), ttnn::full_like(input_a, 0.0f), result, output_mem_config); return ttnn::typecast(result, input_dtype); } Tensor fmod(const Tensor& input_a, const Tensor& input_b, const MemoryConfig& output_mem_config) { @@ -463,8 +463,8 @@ Tensor fmod(const Tensor& input_a, const Tensor& input_b, const MemoryConfig& ou // logit(input, eps)=log(input / 1 - input) Tensor _logit(const Tensor& input_a, float eps, const MemoryConfig& output_mem_config) { - Tensor t_eps = full_like(input_a, eps, output_mem_config); - Tensor t1m_eps = full_like(input_a, (1 - eps), output_mem_config); + Tensor t_eps = ttnn::full_like(input_a, eps); + Tensor t1m_eps = ttnn::full_like(input_a, (1 - eps)); Tensor logit_input = ttnn::where( ttnn::ltz(t_eps, output_mem_config), input_a, @@ -558,7 +558,7 @@ Tensor _scatter(const Tensor& input_a, const Tensor& input_b, const MemoryConfig tt::tt_metal::Array4D start_index = {0, 0, 0, 0}; ttnn::Tensor input_tensor_4D = ttnn::unsqueeze_to_4D(input_a); - Tensor index = ttnn::pad(0, ones_like(input_tensor_4D, output_mem_config), input_b.get_legacy_shape().to_array_4D(), start_index, 0, false, std::nullopt); + Tensor index = ttnn::pad(0, ttnn::full_like(input_tensor_4D, 1.0f), input_b.get_legacy_shape().to_array_4D(), start_index, 0, false, std::nullopt); Tensor temp_a = ttnn::pad(0, input_tensor_4D,input_b.get_legacy_shape().to_array_4D(), start_index, 0, false, std::nullopt); return ttnn::where(index, temp_a, input_b, output_mem_config); } @@ -566,42 +566,6 @@ Tensor scatter(const Tensor& input_a, const Tensor& input_b, const MemoryConfig& return operation::decorate_as_composite(__func__, _scatter)(input_a, input_b, output_mem_config); } -// on-device tensor creation 0s like @reference_tensor -Tensor zeros_like( - uint8_t queue_id, - const Tensor& reference_tensor, - const MemoryConfig& output_mem_config, - std::optional output_tensor) { - return mk_zero_tensor_like(reference_tensor, output_mem_config, output_tensor); -} -Tensor zeros_like( - const Tensor& reference_tensor, const MemoryConfig& output_mem_config, std::optional output_tensor) { - uint8_t default_queue_id = 0; - return mk_zero_tensor_like(default_queue_id, reference_tensor, output_mem_config, output_tensor); -} - -// on-device tensor creation 1s like @reference_tensor -Tensor ones_like(const Tensor& reference_tensor, const MemoryConfig& output_mem_config) { - return mk_filled_tensor_like(reference_tensor, 1.0f, output_mem_config); -} - -// on-device tensor creation with value like @reference_tensor -Tensor full_like( - const Tensor& reference_tensor, - float value, - const MemoryConfig& output_mem_config, - std::optional output_tensor) { - uint8_t default_queue_id = 0; - return mk_filled_tensor_like(reference_tensor, value, output_mem_config, output_tensor, default_queue_id); -} -Tensor full_like( - uint8_t queue_id, - const Tensor& reference_tensor, - float value, - const MemoryConfig& output_mem_config, - std::optional output_tensor) { - return mk_filled_tensor_like(reference_tensor, value, output_mem_config, output_tensor, queue_id); -} // hardtanh Tensor _hardtanh( @@ -613,38 +577,6 @@ Tensor hardtanh( return operation::decorate_as_composite(__func__, _hardtanh)(a, low, high, output_mem_config); } -// on-device tensor creation 0s with shape -Tensor zeros( - const Shape shape, DataType data_type, Layout layout, Device* device, const MemoryConfig& output_mem_config) { - return tt::numpy::zeros(shape, data_type, layout, device, output_mem_config); -} - -Tensor empty( - const Shape shape, DataType data_type, Layout layout, Device* device, const MemoryConfig& output_mem_config) { - return create_device_tensor(shape, data_type, layout, device, output_mem_config); -} - -// on-device tensor creation 1s with shape -Tensor ones( - const Shape shape, DataType data_type, Layout layout, Device* device, const MemoryConfig& output_mem_config) { - return tt::numpy::ones(shape, data_type, layout, device, output_mem_config); -} - -// on-device tensor creation with shape and filled with value -Tensor full( - const Shape shape, - float value, - DataType data_type, - Layout layout, - Device* device, - const MemoryConfig& output_mem_config) { - return tt::numpy::full(shape, value, data_type, layout, device, output_mem_config); -} - -// on-device with increment -Tensor arange(int32_t start, int32_t end, int32_t step, Device* device, const MemoryConfig& output_mem_config) { - return tt::numpy::arange(start, end, step, Layout::ROW_MAJOR, device, output_mem_config); -} /** * outer product = matrix multiply when a = [1,1,N,1] and b = [1,1,1,M] @@ -723,27 +655,6 @@ std::vector split_tensor_for_glu(const Tensor& input_a, int32_t dim, con return t_split; } - -// on-device tensor creation with shape and filled with value -Tensor _sfpu_eps(const Shape shape, Layout layout, Device* device, const MemoryConfig& output_mem_config) { - float value = device->sfpu_eps(); - return tt::numpy::full(shape, value, DataType::BFLOAT16, layout, device, output_mem_config); -} -Tensor sfpu_eps(const Shape shape, Layout layout, Device* device, const MemoryConfig& output_mem_config) { - return operation::decorate_as_composite(__func__, _sfpu_eps)(shape, layout, device, output_mem_config); -} - -Tensor create_mask(const Tensor& input_a, const MemoryConfig& output_mem_config) { - auto& padded_shape = input_a.get_legacy_shape(); - auto& unpadded_shape = padded_shape.without_padding(); - if (padded_shape == unpadded_shape) - return input_a; - float t_inf = -std::numeric_limits::infinity(); - Tensor masked_input = tt::numpy::mask_padded_input(padded_shape, unpadded_shape, DataType::BFLOAT16); - masked_input = ttnn::where(masked_input, input_a, t_inf, output_mem_config); - return masked_input; -} - } // namespace tt_metal } // namespace tt diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.hpp b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.hpp index c0a342e4fceb..0be7311dd8f6 100644 --- a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.hpp +++ b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.hpp @@ -168,84 +168,9 @@ Tensor scatter( const Tensor& input_b, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG); -// on-device tensor creation 0s like @reference_tensor -Tensor zeros_like( - uint8_t queue_id, - const Tensor& reference_tensor, - const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - std::optional output_tensor= std::nullopt); -Tensor zeros_like( - const Tensor& reference_tensor, - const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - std::optional output_tensor= std::nullopt); - -// on-device tensor creation 1s like @reference_tensor -Tensor ones_like( - const Tensor& reference_tensor, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG); - -// on-device tensor creation with value like @reference_tensor -Tensor full_like( - uint8_t queue_id, - const Tensor& reference_tensor, - float value, - const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - std::optional output_tensor= std::nullopt); -Tensor full_like( - const Tensor& reference_tensor, - float value, - const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - std::optional output_tensor= std::nullopt); - -// on-device tensor creation 0s with shape -Tensor empty( - const Shape shape, - DataType data_type = DataType::BFLOAT16, - Layout layout = Layout::ROW_MAJOR, - Device* device = nullptr, - const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG); - -// on-device tensor creation 0s with shape -Tensor zeros( - const Shape shape, - DataType data_type = DataType::BFLOAT16, - Layout layout = Layout::ROW_MAJOR, - Device* device = nullptr, - const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG); - -// on-device tensor creation 1s with shape -Tensor ones( - const Shape shape, - DataType data_type = DataType::BFLOAT16, - Layout layout = Layout::ROW_MAJOR, - Device* device = nullptr, - const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG); - -Tensor arange( - int32_t start, - int32_t end, - int32_t step = 1, - Device* device = nullptr, - const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG); - -// on-device tensor creation with shape and filled with value -Tensor full( - const Shape shape, - float value, - DataType data_type = DataType::BFLOAT16, - Layout layout = Layout::ROW_MAJOR, - Device* device = nullptr, - const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG); - // rpow: y = k**(a) Tensor rpow(const Tensor& a, float k, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG); -// machine epsilon -Tensor eps( - const Shape shape, - Layout layout, - Device* device, - const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG); - // logit(input, eps)=log(input / 1 - input) Tensor logit( const Tensor& input_a, float eps, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG); @@ -273,9 +198,6 @@ Tensor logical_ori( float immediate, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG); -// on-device tensor creation with shape and filled with value -Tensor sfpu_eps(const Shape shape, Layout layout, Device* device, const MemoryConfig& output_mem_config); - } // namespace tt_metal } // namespace tt diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/optimizer/optimizer_ops.cpp b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/optimizer/optimizer_ops.cpp index f78e81a1fddf..a515e137ae94 100644 --- a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/optimizer/optimizer_ops.cpp +++ b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/optimizer/optimizer_ops.cpp @@ -3,7 +3,6 @@ // SPDX-License-Identifier: Apache-2.0 #include "ttnn/deprecated/tt_dnn/op_library/optimizer/optimizer_ops.hpp" -#include "ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.hpp" #include "ttnn/deprecated/tt_dnn/op_library/reshape/reshape_op.hpp" #include "ttnn/deprecated/tt_dnn/op_library/reduce/reduce_op.hpp" @@ -13,6 +12,7 @@ #include "ttnn/operations/eltwise/unary/unary.hpp" #include "ttnn/cpp/ttnn/operations/eltwise/ternary/where.hpp" #include "ttnn/operations/eltwise/unary/unary_composite.hpp" +#include "ttnn/operations/creation.hpp" namespace tt { @@ -44,7 +44,7 @@ std::vector _lamb_optimizer(const Tensor& data, const Tensor& grad, cons auto rmsnorm = [&output_mem_config](Tensor data) -> Tensor { Tensor data_val = ttnn::square(data, output_mem_config); data_val = global_sum(data_val,output_mem_config); - Tensor zeros = zeros_like(data, output_mem_config); + Tensor zeros = ttnn::full_like(data, 0.0f); data_val = ttnn::sqrt(ttnn::add(zeros, data_val, std::nullopt, output_mem_config), output_mem_config); return data_val; }; @@ -52,7 +52,7 @@ std::vector _lamb_optimizer(const Tensor& data, const Tensor& grad, cons Tensor weight_norm = ttnn::clamp(data_val, 0.0f, 10.0f, output_mem_config); Tensor adam_norm = rmsnorm(adam_step); - Tensor ones = ones_like(weight_norm, output_mem_config); + Tensor ones = ttnn::full_like(weight_norm, 1.0f); Tensor trust_ratio_mid = ttnn::multiply(weight_norm, ttnn::reciprocal(ttnn::add(adam_norm, eps, std::nullopt, output_mem_config),output_mem_config), std::nullopt, output_mem_config); Tensor trust_ratio = ttnn::where(ttnn::gtz(weight_norm, output_mem_config), ttnn::where(ttnn::gtz(adam_norm, output_mem_config), trust_ratio_mid, ones, output_mem_config), ones); diff --git a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/optimizer/optimizer_ops.hpp b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/optimizer/optimizer_ops.hpp index dd13c8b53405..80dba46a5538 100644 --- a/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/optimizer/optimizer_ops.hpp +++ b/ttnn/cpp/ttnn/deprecated/tt_dnn/op_library/optimizer/optimizer_ops.hpp @@ -3,7 +3,6 @@ // SPDX-License-Identifier: Apache-2.0 #pragma once -#include "ttnn/deprecated/tt_dnn/op_library/composite/composite_ops.hpp" namespace tt { namespace tt_metal { diff --git a/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/tt_lib_bindings_tensor_composite_ops.cpp b/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/tt_lib_bindings_tensor_composite_ops.cpp index ed1740d4b515..0392ca31126f 100644 --- a/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/tt_lib_bindings_tensor_composite_ops.cpp +++ b/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/tt_lib_bindings_tensor_composite_ops.cpp @@ -15,27 +15,6 @@ namespace tt::tt_metal::detail { void TensorModuleCompositeOPs(py::module& m_tensor) { - m_tensor.def( - "sfpu_eps", - &tt::tt_metal::sfpu_eps, - py::arg("shape"), - py::arg("layout").noconvert() = Layout::ROW_MAJOR, - py::arg("device") = nullptr, - py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - R"doc( - Returns a new tensor filled with the machine epsilon value in shape specified by input ``shape``. - - Input shape is specified as a list of 4 integer elements - - Output tensor will have BFLOAT16 data type. - - .. csv-table:: - :header: "Argument", "Description", "Data type", "Valid range", "Required" - "shape", "Shape vector", "Vector", "[W, Z, Y, X]", "Yes" - "layout", "Tensor layout", "Layout", "default is ROW_MAJOR", "No" - "device", "Device tensor is placed on", "Device", "default is None (on host)", "No" - "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No" - )doc"); m_tensor.def( "outer", @@ -187,210 +166,6 @@ void TensorModuleCompositeOPs(py::module& m_tensor) { "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No" )doc"); - m_tensor.def( - "full_like", - [](const Tensor& reference_tensor, - float value, - const MemoryConfig& output_mem_config, - std::optional output_tensor, - uint8_t queue_id) { - return full_like(queue_id, reference_tensor, value, output_mem_config, output_tensor); - }, - py::arg("input").noconvert(), - py::arg("fill_value"), - py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - py::arg("output_tensor").noconvert() = std::nullopt, - py::arg("queue_id").noconvert() = 0, - R"doc( - Returns a new tensor filled with the scalar value shaped like reference tensor ``arg0``. - - Input tensor must have BFLOAT16 data type. - - Output tensor will have BFLOAT16 data type. - - .. csv-table:: - :header: "Argument", "Description", "Data type", "Valid range", "Required" - - "input", "Reference Tensor", "Tensor", "Tensor of shape [W, Z, Y, X]", "Yes" - "fill_value", "Fill value", "float", "", "Yes" - "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No" - "output_tensor", "optional output tensor", "Tensor", "default is None", "No" - "queue_id", "Command queue id", "integer", "default to 0", "No" - )doc"); - - m_tensor.def( - "zeros_like", - [](const Tensor& reference_tensor, - const MemoryConfig& output_mem_config, - std::optional output_tensor, - uint8_t queue_id) { - return zeros_like(queue_id, reference_tensor, output_mem_config, output_tensor); - }, - py::arg("input").noconvert(), - py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - py::arg("output_tensor").noconvert() = std::nullopt, - py::arg("queue_id").noconvert() = 0, - R"doc( - Returns a new tensor filled with zeros shaped like reference tensor ``input``. - - Input tensor must have BFLOAT16 data type. - - Output tensor will have BFLOAT16 data type. - - .. csv-table:: - :header: "Argument", "Description", "Data type", "Valid range", "Required" - - "input", "Reference Tensor", "Tensor", "Tensor of shape [W, Z, Y, X]", "Yes" - "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No" - "output_tensor", "optional output tensor", "Tensor", "default is None", "No" - "queue_id", "Command queue id", "integer", "default to 0", "No" - )doc"); - - m_tensor.def( - "ones_like", - &ones_like, - py::arg("input").noconvert(), - py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - R"doc( - Returns a new tensor filled with ones shaped like reference tensor ``arg0``. - - Input tensor must have BFLOAT16 data type. - - Output tensor will have BFLOAT16 data type. - - .. csv-table:: - :header: "Argument", "Description", "Data type", "Valid range", "Required" - - "input", "Reference Tensor", "Tensor", "Tensor of shape [W, Z, Y, X]", "Yes" - "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No" - )doc"); - - m_tensor.def( - "zeros", - &zeros, - py::arg("shape"), - py::arg("data_type").noconvert() = DataType::BFLOAT16, - py::arg("layout").noconvert() = Layout::ROW_MAJOR, - py::arg("device") = nullptr, - py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - R"doc( - Returns a new tensor filled with zeros in shape specified by input ``shape``. - - Input shape is specified as a list of 4 integer elements - - Output tensor will have BFLOAT16 data type. - - .. csv-table:: - :header: "Argument", "Description", "Data type", "Valid range", "Required" - - "shape", "Shape vector", "Vector", "[W, Z, Y, X]", "Yes" - "data_type", "Tensor data type", "DataType", "default is BFLOAT16", "No" - "layout", "Tensor layout", "Layout", "default is ROW_MAJOR", "No" - "device", "Device tensor is placed on", "Device", "default is None (on host)", "No" - "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No" - )doc"); - - m_tensor.def( - "empty", - &empty, - py::arg("shape"), - py::arg("data_type").noconvert() = DataType::BFLOAT16, - py::arg("layout").noconvert() = Layout::ROW_MAJOR, - py::arg("device") = nullptr, - py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - R"doc( - Returns a new empty tensor (on device) in shape specified by input ``shape``. - - Input shape is specified as a list of 4 integer elements - - Output tensor will have BFLOAT16 data type. - - .. csv-table:: - :header: "Argument", "Description", "Data type", "Valid range", "Required" - - "shape", "Shape vector", "Vector", "[W, Z, Y, X]", "Yes" - "data_type", "Tensor data type", "DataType", "default is BFLOAT16", "No" - "layout", "Tensor layout", "Layout", "default is ROW_MAJOR", "No" - "device", "Device tensor is placed on", "Device", "default is None (on host)", "No" - "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No" - )doc"); - - m_tensor.def( - "ones", - &ones, - py::arg("shape"), - py::arg("data_type").noconvert() = DataType::BFLOAT16, - py::arg("layout").noconvert() = Layout::ROW_MAJOR, - py::arg("device") = nullptr, - py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - R"doc( - Returns a new tensor filled with ones in shape specified by input ``shape``. - - Input shape is specified as a list of 4 integer elements - - Output tensor will have BFLOAT16 data type. - - .. csv-table:: - :header: "Argument", "Description", "Data type", "Valid range", "Required" - - "shape", "Shape vector", "Vector", "[W, Z, Y, X]", "Yes" - "data_type", "Tensor data type", "DataType", "default is BFLOAT16", "No" - "layout", "Tensor layout", "Layout", "default is ROW_MAJOR", "No" - "device", "Device tensor is placed on", "Device", "default is None (on host)", "No" - "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No" - )doc"); - - m_tensor.def( - "full", - &full, - py::arg("shape"), - py::arg("fill_value"), - py::arg("data_type").noconvert() = DataType::BFLOAT16, - py::arg("layout").noconvert() = Layout::ROW_MAJOR, - py::arg("device") = nullptr, - py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - R"doc( - Returns a new tensor filled with the scalar value in shape specified by input ``shape``. - - Input shape is specified as a list of 4 integer elements - - Output tensor will have BFLOAT16 data type. - - .. csv-table:: - :header: "Argument", "Description", "Data type", "Valid range", "Required" - - "shape", "Shape vector", "Vector", "[W, Z, Y, X]", "Yes" - "fill_value", "Fill value ", "float", "", "Yes" - "data_type", "Tensor data type", "DataType", "default is BFLOAT16", "No" - "layout", "Tensor layout", "Layout", "default is ROW_MAJOR", "No" - "device", "Device tensor is placed on", "Device", "default is None (on host)", "No" - "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No" - )doc"); - - m_tensor.def( - "arange", - &arange, - py::arg("start"), - py::arg("end"), - py::arg("step"), - py::arg("device") = nullptr, - py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - R"doc( - Returns a new 1D tensor with the incremented values in size specified by inputs ``start``, ``end`` and ``step``. - - Inpute scalars are integers specifying start, end, and step sizes. - Output tensor will have BFLOAT16 data type. - - .. csv-table:: - :header: "Argument", "Description", "Data type", "Valid range", "Required" - - "start", "Start", "int", "", "Yes" - "end", "End", "int", "> start", "Yes" - "step", "Step", "int", "> 0", "Yes" - "device", "Device tensor is placed on", "Device", "default is None (on host)", "No" - "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No" - )doc"); - #if 0 m_tensor.def("bitwise_complement", &bitwise_complement, R"doc( diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary_backward/device/binary_backward_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary_backward/device/binary_backward_op.cpp index a5f636ad6096..52cd09b1e4ce 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/binary_backward/device/binary_backward_op.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/binary_backward/device/binary_backward_op.cpp @@ -271,26 +271,18 @@ std::vector> _eq_bw( std::vector> result; if (are_required_outputs.at(0)) { - if(input_grad.has_value()){ - tt::tt_metal::zeros_like(cq_id, input, output_mem_config, input_grad); - } else { - input_grad = tt::tt_metal::zeros_like(cq_id, input, output_mem_config); - } + input_grad = ttnn::full_like(input, 0.0f); result.emplace_back(input_grad); } else { result.emplace_back(std::nullopt); } if (are_required_outputs.at(1)) { - if(other_grad.has_value()){ - tt::tt_metal::zeros_like(cq_id, input, output_mem_config, other_grad); - } else { - other_grad = tt::tt_metal::zeros_like(cq_id, input, output_mem_config); - } + other_grad = ttnn::full_like(grad, 0.0f); result.emplace_back(other_grad); } else { result.emplace_back(std::nullopt); } - return std::move(result); + return result; } std::vector _eq_bw_inter( diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary_backward/device/unary_backward_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/unary_backward/device/unary_backward_op.cpp index ec22a0097191..2df7755ac593 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/unary_backward/device/unary_backward_op.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/unary_backward/device/unary_backward_op.cpp @@ -192,11 +192,7 @@ std::vector> _pow_bw(uint8_t queue_id, const Tensor& grad, const float ZERO_THRESHOLD = std::numeric_limits::epsilon() * 10.0f; TT_FATAL(exponent >= 0.0, "negative exponents are not supported; use recip(pow(input,abs(exponent)))"); if (std::abs(exponent) < ZERO_THRESHOLD) { - if(input_grad.has_value()){ - tt::tt_metal::zeros_like(queue_id, input, output_mem_config, input_grad); - } else { - input_grad = tt::tt_metal::zeros_like(queue_id, input, output_mem_config); - } + input_grad = ttnn::operations::creation::zeros_like(input); grad_tensor.emplace_back(input_grad); return grad_tensor; } @@ -496,8 +492,8 @@ std::vector _fill_bw(const Tensor& grad, const Tensor& input, const std: std::vector grad_tensor; auto output_memory_config = output_mem_config.value_or(input.memory_config()); Tensor val = grad; - val = global_sum(val, output_memory_config); - Tensor result = tt::tt_metal::zeros_like(grad, output_memory_config); + val = global_sum(val); + Tensor result = ttnn::operations::creation::zeros_like(grad); result = ttnn::add(result, val, std::nullopt, output_mem_config); grad_tensor.emplace_back(result); return grad_tensor; @@ -1422,6 +1418,7 @@ std::vector _repeat_bw( auto shape_wh = input.get_legacy_shape(); TT_FATAL(shape_wh[0] == 1 && "input shape[0] should be 1"); + auto ttnn_device = input.device(); // input.get_legacy_shape()[0] // If repeat shape has 0's, it returns zeros of given input if (shape[0] == 0 || shape[1] == 0 || shape[2] == 0 || shape[3] == 0) { @@ -1431,24 +1428,26 @@ std::vector _repeat_bw( } else if (shape[0] > 1) { std::vector dim = {0}; TT_FATAL(shape[1] == 1 && shape[2] == 1 && shape[3] == 1 && "repeat[1], [2], [3] should be 1"); - const tt::tt_metal::Shape required = {1, shape_wh[1], shape_wh[2], shape_wh[3]}; + std::array intended_shape_array = {1, shape_wh[1], shape_wh[2], shape_wh[3]}; + const ttnn::Shape required = ttnn::Shape(intended_shape_array); Tensor result = tt::operations::primary::moreh_sum( grad, dim, true, - tt::tt_metal::zeros(required, input.get_dtype(), input.get_layout(), input.device(), output_memory_config), + ttnn::operations::creation::zeros(required, input.get_dtype(), input.get_layout(), std::optional>(*ttnn_device), output_memory_config), output_memory_config); grad_tensor.emplace_back(result); return grad_tensor; } else if (shape[1] > 1) { std::vector dim = {1}; TT_FATAL(shape[0] == 1 && shape[2] == 1 && shape[3] == 1 && "repeat[0], [2], [3] should be 1"); - const tt::tt_metal::Shape required = {shape_wh[0], 1, shape_wh[2], shape_wh[3]}; + std::array intended_shape_array = {shape_wh[0], 1, shape_wh[2], shape_wh[3]}; + const ttnn::Shape required = ttnn::Shape(intended_shape_array); Tensor result = tt::operations::primary::moreh_sum( grad, dim, true, - tt::tt_metal::zeros(required, input.get_dtype(), input.get_layout(), input.device(), output_memory_config), + ttnn::operations::creation::zeros(required, input.get_dtype(), input.get_layout(), std::optional>(*ttnn_device), output_memory_config), output_memory_config); grad_tensor.emplace_back(result); return grad_tensor; diff --git a/ttnn/ttnn/operations/creation.py b/ttnn/ttnn/operations/creation.py index e58dbf9e07a8..174b40efaa64 100644 --- a/ttnn/ttnn/operations/creation.py +++ b/ttnn/ttnn/operations/creation.py @@ -86,32 +86,6 @@ def _golden_function(start: int, end: int, step: int, **_): return torch.arange(start, end, step) -@ttnn.register_python_operation( - name="ttnn.arange", - golden_function=_golden_function, -) -def arange( - start: int, - end: int, - step: int, - device, - memory_config: ttnn.MemoryConfig = ttnn.DRAM_MEMORY_CONFIG, -) -> ttnn.Tensor: - r""" - - arange(start: int, end: int, step: int, device, memory_config: ttnn.MemoryConfig = ttnn.DRAM_MEMORY_CONFIG) -> ttnn.Tensor - - Returns a new 1D tensor with the incremented values in size specified by inputs start, end and step. - - Args: - * :attr:`start` - * :attr:`end` - * :attr:`step` - """ - - output_tensor = ttnn.experimental.tensor.arange(start, end, step, device, output_mem_config=memory_config) - - return output_tensor - +ttnn.attach_golden_function(ttnn.arange, golden_function=_golden_function) __all__ = []