Skip to content

Commit

Permalink
#10644: remove tensor creation ops
Browse files Browse the repository at this point in the history
  • Loading branch information
umadevimcw authored and KalaivaniMCW committed Jul 31, 2024
1 parent 96c3029 commit 6ad7dbb
Show file tree
Hide file tree
Showing 36 changed files with 190 additions and 542 deletions.
15 changes: 0 additions & 15 deletions docs/source/ttnn/ttnn/dependencies/tt_lib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -330,23 +330,8 @@ but in general retaining the data.
Tensor creation operations
==========================

.. autofunction:: tt_lib.tensor.arange

.. autofunction:: tt_lib.tensor.full

.. autofunction:: tt_lib.tensor.ones

.. autofunction:: tt_lib.tensor.ones_like

.. autofunction:: tt_lib.tensor.zeros

.. autofunction:: tt_lib.tensor.zeros_like

.. autofunction:: tt_lib.tensor.full_like

.. autofunction:: tt_lib.tensor.split_last_dim_two_chunks_tiled

.. autofunction:: tt_lib.tensor.empty

Broadcast and Reduce
====================
Expand Down
2 changes: 1 addition & 1 deletion models/demos/metal_BERT_large_11/tt/mha.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def op6_unmake_attention_heads(x):
def mha_(activation, attention_mask):
# TODO: Remove hardcoded shape hack
if reserve_split_heads_shape is not None:
temp = tt_lib.tensor.empty(
temp = ttnn.empty(
reserve_split_heads_shape,
tt_lib.tensor.DataType.BFLOAT16,
tt_lib.tensor.Layout.ROW_MAJOR,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def forward(

# during inference, return the average of both classifier predictions
logits = ttnn.add(cls_logits, distillation_logits)
half = tt_lib.tensor.full(logits.get_legacy_shape(), 0.5)
half = ttnn.full(logits.get_legacy_shape(), 0.5)
logits = ttnn.mul(logits, half)

# if not return_dict:
Expand Down
2 changes: 1 addition & 1 deletion models/experimental/deit/tt/deit_self_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def forward(

attention_scores = ttnn.matmul(query_layer, key_layer_transposed)

attention_head_size_tt = tt_lib.tensor.full(attention_scores.get_legacy_shape(), self.attention_head_size)
attention_head_size_tt = ttnn.full(attention_scores.get_legacy_shape(), self.attention_head_size)
attention_head_size_tt = ttnn.sqrt(attention_head_size_tt)
attention_head_size_tt = ttnn.reciprocal(attention_head_size_tt)

Expand Down
3 changes: 2 additions & 1 deletion models/experimental/distilbert/tt/distilbert_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
)

import tt_lib
import ttnn
from dataclasses import dataclass

from models.experimental.distilbert.tt.distilbert_embedding import TtDistilBert_Embeddings
Expand Down Expand Up @@ -105,7 +106,7 @@ def forward(

if attention_mask is not None:
input_shape[0:0] = [1, 1]
attention_mask = tt_lib.tensor.ones(input_shape)
attention_mask = ttnn.ones(input_shape)

head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __init__(self, config, state_dict=None, base_address="", device=None):
self.attention_head_size = self.dim // self.n_heads

def const_tensor(self, shape: List[int], value: int) -> tt_lib.tensor.Tensor:
return tt_lib.tensor.full(shape, value)
return ttnn.full(shape, value)

def get_min(self, tensor: tt_lib.tensor.Tensor):
tensor = tt_to_torch_tensor(tensor)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,18 +95,18 @@ def attention(config, x, bcast_freq_xq, bcast_freq_xk, positions, mask, seqlen,
scatter_pos = scatter_pos.to(torch.int64)
scatter_pos = scatter_pos.repeat(bsz, 1, config.n_kv_heads, config.head_dim)

cache_k = tt_lib.tensor.empty(
cache_k = ttnn.empty(
[config.max_batch_size, config.sliding_window, config.n_kv_heads, config.head_dim],
layout=tt_lib.tensor.Layout.ROW_MAJOR,
device=device,
output_mem_config=config.out_mem_config,
memory_config=config.out_mem_config,
)
cache_k = tt_to_torch_tensor(cache_k).to(torch.float32)
cache_v = tt_lib.tensor.empty(
cache_v = ttnn.empty(
[config.max_batch_size, config.sliding_window, config.n_kv_heads, config.head_dim],
layout=tt_lib.tensor.Layout.ROW_MAJOR,
device=device,
output_mem_config=config.out_mem_config,
memory_config=config.out_mem_config,
)
cache_v = tt_to_torch_tensor(cache_v).to(torch.float32)
cache_k[:bsz].scatter_(dim=1, index=scatter_pos, src=xk[:, -config.sliding_window :])
Expand Down
4 changes: 2 additions & 2 deletions models/experimental/mistral/mistral_helper_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def get_freqs_cis(freqs_cis: torch.Tensor, query_shape, key_shape, device=None,
BCH = tt_lib.tensor.BcastOpDim.HW
BCMUL = tt_lib.tensor.BcastOpMath.MUL

t_one_xq = tt_lib.tensor.ones(query_shape, output_mem_config=mem_config)
t_one_xq = ttnn.ones(query_shape, memory_config=mem_config)
t_one_xq = ttnn.permute(t_one_xq, (3, 1, 2, 0), memory_config=mem_config)

freqs_real = ttnn.permute(freqs_cis.real, (3, 1, 2, 0), memory_config=mem_config)
Expand All @@ -130,7 +130,7 @@ def get_freqs_cis(freqs_cis: torch.Tensor, query_shape, key_shape, device=None,
bcast_freq_re_xq.deallocate()
bcast_freq_im_xq.deallocate()

t_one_xk = tt_lib.tensor.ones(key_shape, output_mem_config=mem_config)
t_one_xk = ttnn.ones(key_shape, memory_config=mem_config)
t_one_xk = ttnn.permute(t_one_xk, (3, 1, 2, 0), memory_config=mem_config)

bcast_freq_re_xk = tt_lib.tensor.bcast(t_one_xk, freqs_real, BCMUL, BCH, output_mem_config=mem_config)
Expand Down
8 changes: 4 additions & 4 deletions models/experimental/mistral/tt/mistral_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,18 +87,18 @@ def __init__(
)
self.cache_v = torch.empty(args.max_batch_size, args.sliding_window, self.n_kv_heads, self.args.head_dim)
else:
cache_k = tt_lib.tensor.empty(
cache_k = ttnn.empty(
[args.max_batch_size, args.sliding_window, self.n_kv_heads, self.args.head_dim],
layout=tt_lib.tensor.Layout.ROW_MAJOR,
device=self.device,
output_mem_config=self.args.out_mem_config,
memory_config=self.args.out_mem_config,
)
self.cache_k = tt_to_torch_tensor(cache_k).to(torch.float32)
cache_v = tt_lib.tensor.empty(
cache_v = ttnn.empty(
[args.max_batch_size, args.sliding_window, self.n_kv_heads, self.args.head_dim],
layout=tt_lib.tensor.Layout.ROW_MAJOR,
device=self.device,
output_mem_config=self.args.out_mem_config,
memory_config=self.args.out_mem_config,
)
self.cache_v = tt_to_torch_tensor(cache_v).to(torch.float32)

Expand Down
2 changes: 1 addition & 1 deletion models/experimental/mistral/tt/mistral_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def forward(
mask: Optional[torch.Tensor] = None
if input_ids.get_legacy_shape()[-1] > 1:
seqlen = input_ids.get_legacy_shape()[-1]
tensor = tt_lib.tensor.full(
tensor = ttnn.full(
(1, 1, seqlen, seqlen),
fill_value=1.0,
)
Expand Down
4 changes: 2 additions & 2 deletions models/experimental/nanogpt/tt/nanogpt_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __init__(self, config, base_address, device, tt_cache_path, dtype):
self.n_head = self.config.n_head
self.n_embd = self.config.n_embd

temp_bias = ttnn.tril(tt_lib.tensor.ones([1, 1, self.block_size, self.block_size]))
temp_bias = ttnn.tril(ttnn.ones([1, 1, self.block_size, self.block_size]))
temp_bias = tt_to_torch_tensor(temp_bias)
self.register_buffer(
"bias",
Expand All @@ -69,7 +69,7 @@ def __init__(self, config, base_address, device, tt_cache_path, dtype):
)

def const_tensor(self, shape, value):
return tt_lib.tensor.full(shape, value)
return ttnn.full(shape, value)

def forward(self, x: tt_lib.tensor.Tensor) -> tt_lib.tensor.Tensor:
(
Expand Down
2 changes: 1 addition & 1 deletion models/experimental/nanogpt/tt/nanogpt_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def forward(self, idx: torch.Tensor) -> tt_lib.tensor.Tensor:
assert (
t <= self.config.block_size
), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
pos = tt_lib.tensor.arange(0, t, 1)
pos = ttnn.arange(0, t, 1)
pos = tt_to_torch_tensor(pos)
pos = pos.squeeze(0).squeeze(0)
pos = pos.to(dtype=torch.int64)
Expand Down
14 changes: 7 additions & 7 deletions models/experimental/roberta/tt/roberta_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from typing import Optional, Tuple, Union, List

import tt_lib

import ttnn
from models.experimental.roberta.tt.roberta_encoder import TtRobertaEncoder
from models.experimental.roberta.tt.roberta_pooler import TtRobertaPooler
from models.experimental.roberta.tt.roberta_embeddings import PytorchEmbeddings
Expand Down Expand Up @@ -170,8 +170,8 @@ def get_extended_attention_mask(
# positions we want to attend and the dtype's smallest value for masked positions.
# Since we are adding it to the raw scores before the softmax, this is
# effectively the same as removing these entirely.
self.ones_const = tt_lib.tensor.full(extended_attention_mask.get_legacy_shape(), 1.0)
self.mul_const = tt_lib.tensor.full(extended_attention_mask.get_legacy_shape(), self.dtype_min_const)
self.ones_const = ttnn.full(extended_attention_mask.get_legacy_shape(), 1.0)
self.mul_const = ttnn.full(extended_attention_mask.get_legacy_shape(), self.dtype_min_const)
extended_attention_mask = ttnn.sub(self.ones_const, extended_attention_mask, memory_config=self.mem_config)

extended_attention_mask = ttnn.mul(extended_attention_mask, self.mul_const, memory_config=self.mem_config)
Expand All @@ -196,8 +196,8 @@ def invert_attention_mask(self, encoder_attention_mask: tt_lib.tensor.Tensor) ->

encoder_extended_attention_mask = torch2tt_tensor(torch_encoder_extended_attention_mask, self.device)

self.ones_const = tt_lib.tensor.full(encoder_extended_attention_mask.get_legacy_shape(), 1.0)
self.mul_const = tt_lib.tensor.full(encoder_extended_attention_mask.get_legacy_shape(), self.dtype_min_const)
self.ones_const = ttnn.full(encoder_extended_attention_mask.get_legacy_shape(), 1.0)
self.mul_const = ttnn.full(encoder_extended_attention_mask.get_legacy_shape(), self.dtype_min_const)

encoder_extended_attention_mask = ttnn.sub(
self.ones_const,
Expand Down Expand Up @@ -339,7 +339,7 @@ def forward(
past_key_values_length = past_key_values[0][0].get_legacy_shape()[2] if past_key_values is not None else 0

if attention_mask is None:
attention_mask = tt_lib.tensor.full((1, 1, batch_size, seq_length + past_key_values_length), 0.0)
attention_mask = ttnn.full((1, 1, batch_size, seq_length + past_key_values_length), 0.0)

if token_type_ids is None:
if hasattr(self.embeddings, "token_type_ids"):
Expand All @@ -364,7 +364,7 @@ def forward(
) = encoder_hidden_states.get_legacy_shape()
encoder_hidden_shape = (1, 1, encoder_batch_size, encoder_sequence_length)
if encoder_attention_mask is None:
encoder_attention_mask = tt_lib.tensor.full(encoder_hidden_shape, 1.1)
encoder_attention_mask = ttnn.full(encoder_hidden_shape, 1.1)
encoder_extended_attention_mask = self.invert_attention_mask(encoder_attention_mask)
else:
encoder_extended_attention_mask = None
Expand Down
2 changes: 1 addition & 1 deletion models/experimental/roberta/tt/roberta_self_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def forward(
# back to tt
attention_scores = torch2tt_tensor(attention_scores, self.device)

div_const = tt_lib.tensor.full(
div_const = ttnn.full(
attention_scores.get_legacy_shape(),
1.0 / math.sqrt(self.attention_head_size),
)
Expand Down
3 changes: 2 additions & 1 deletion models/experimental/ssd/tt/ssd.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import torch
from torch import nn
import tt_lib
import ttnn
import tt_lib.fallback_ops as fallback_ops
from typing import List, Optional, Tuple, Dict, OrderedDict

Expand Down Expand Up @@ -102,7 +103,7 @@ def __init__(

def get_in_channels(self, backbone: TtSSDLiteFeatureExtractorMobileNet):
size = (320, 320)
temporary_image = tt_lib.tensor.ones([1, 3, size[1], size[0]], device=self.device)
temporary_image = ttnn.ones([1, 3, size[1], size[0]], device=self.device)
backbone.eval()
features = backbone(temporary_image)
out_channels = [tensor.get_legacy_shape()[1] for i, tensor in features.items()]
Expand Down
2 changes: 1 addition & 1 deletion models/experimental/stable_diffusion/tt/cross_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def get_attention_scores(
# self.scale,
# self.scale)

scale_tensor = ttl.tensor.full(temp.get_legacy_shape(), self.scale)
scale_tensor = ttnn.full(temp.get_legacy_shape(), self.scale)
attention_scores = ttnn.mul(scale_tensor, temp)

if attention_mask is not None:
Expand Down
2 changes: 1 addition & 1 deletion models/experimental/stable_diffusion/tt/residual_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def forward(self, input_tensor: ttl.tensor.Tensor, temb: ttl.tensor.Tensor) -> t

# create a tensor of size output_scale_factor
output_sc_recip = 1 / self.output_scale_factor
output_sc_recip = ttl.tensor.full(input_tensor.get_legacy_shape(), output_sc_recip)
output_sc_recip = ttnn.full(input_tensor.get_legacy_shape(), output_sc_recip)
output_tensor = ttnn.add(input_tensor, hidden_states)
output_tensor = ttnn.mul(output_tensor, output_sc_recip)

Expand Down
2 changes: 1 addition & 1 deletion models/experimental/swin/tt/swin_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(self, config, state_dict, base_address, device, use_mask_token=Fals
self.norm = fallback_ops.LayerNorm(gamma, beta, normalized_shape=config.embed_dim, eps=config.layer_norm_eps)

def const_tensor(self, shape, value):
return tt_lib.tensor.full(shape, value)
return ttnn.full(shape, value)

def forward(
self,
Expand Down
2 changes: 1 addition & 1 deletion models/experimental/swin/tt/swin_self_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def __init__(
self.value_bias = torch_to_tt_tensor_rm(state_dict[f"{base_address}.value.bias"], self.device)

def const_tensor(self, shape, value):
return tt_lib.tensor.full(shape, value)
return ttnn.full(shape, value)

def transpose_for_scores(self, x: tt_lib.tensor.Tensor) -> tt_lib.tensor.Tensor:
# x must be 4d originaly
Expand Down
3 changes: 2 additions & 1 deletion models/experimental/whisper/tt/whisper_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import torch
import torch.nn as nn
import tt_lib
import ttnn
from typing import Optional, Tuple, Union

from models.utility_functions import torch2tt_tensor, tt2torch_tensor
Expand Down Expand Up @@ -114,7 +115,7 @@ def forward(
if q_proj_shape == self.cached_q_proj_shape:
q_proj_mul_const = self.q_proj_mul_const
else:
self.q_proj_mul_const = tt_lib.tensor.full(q_proj_shape, self.scaling)
self.q_proj_mul_const = ttnn.full(q_proj_shape, self.scaling)
self.cached_q_proj_shape = q_proj_shape
q_proj_mul_const = self.q_proj_mul_const

Expand Down
32 changes: 9 additions & 23 deletions models/experimental/whisper/tt/whisper_for_audio_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# SPDX-License-Identifier: Apache-2.0

import tt_lib
import ttnn
import torch
import torch.nn as nn
from dataclasses import dataclass
Expand All @@ -14,6 +15,7 @@

from models.experimental.whisper.tt.whisper_encoder import TtWhisperEncoder


@dataclass
class TtWhisperForAudioClassificationOutput:
loss: Optional[tt_lib.tensor.Tensor] = None
Expand All @@ -37,16 +39,12 @@ def __init__(self, state_dict, device, config):
config=config,
)

num_layers = (
config.num_hidden_layers + 1
) # transformer layers + input embeddings
num_layers = config.num_hidden_layers + 1 # transformer layers + input embeddings
if config.use_weighted_layer_sum:
# Not using this parameter for now
N, C, H, W = 1, 1, 1, num_layers
weight_init_const = 1.0 / num_layers
self.layer_weights = tt_lib.tensor.full(
(1, 1, 1, num_layers), weight_init_const
)
self.layer_weights = ttnn.full((1, 1, 1, num_layers), weight_init_const)

self.projector_weight = torch2tt_tensor(
state_dict[f"projector.weight"], self.device, tt_lib.tensor.Layout.ROW_MAJOR
Expand Down Expand Up @@ -122,19 +120,11 @@ def forward(
'af_za'
```"""

output_attentions = (
output_attentions
if output_attentions is not None
else self.config.output_attentions
)
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states
if output_hidden_states is not None
else self.config.output_hidden_states
)
return_dict = (
return_dict if return_dict is not None else self.config.use_return_dict
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

if encoder_outputs is None:
encoder_outputs = self.encoder(
Expand All @@ -160,17 +150,13 @@ def forward(
hidden_states = encoder_outputs.last_hidden_state

# Apply Linear layer
hidden_states = linear(
hidden_states, self.projector_weight, self.projector_bias
)
hidden_states = linear(hidden_states, self.projector_weight, self.projector_bias)

# Torch mean
torch_hidden_states = tt2torch_tensor(hidden_states)
torch_pooled_output = torch_hidden_states.mean(dim=-2)
# If something changes these dimension -2 should always work
pooled_output = torch2tt_tensor(
torch_pooled_output, self.device, tt_lib.tensor.Layout.ROW_MAJOR
)
pooled_output = torch2tt_tensor(torch_pooled_output, self.device, tt_lib.tensor.Layout.ROW_MAJOR)

# Apply classifier layer
logits = linear(pooled_output, self.classifier_weight, self.classifier_bias)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,6 @@ def custom_compare(*args, **kwargs):
"hypot",
"hardswish",
"hardsigmoid",
"ones_like",
"zeros_like",
"full_like",
"ones",
"empty",
"zeros",
"full",
"arange",
"hardshrink",
"softshrink",
"sinh",
Expand Down
Loading

0 comments on commit 6ad7dbb

Please sign in to comment.