Merge branch 'main' into abhullar/diff-aligns

tenstorrent · Jan 12, 2025 · a588309 · a588309
2 parents ec1c03e + 35c7145
commit a588309
Show file tree

Hide file tree

Showing 45 changed files with 2,558 additions and 353 deletions.
diff --git a/.github/workflows/ttnn-run-sweeps.yaml b/.github/workflows/ttnn-run-sweeps.yaml
@@ -158,6 +158,10 @@ on:
           - eltwise.unary.sinh.sinh
           - eltwise.unary.sinh.sinh_sharded
           - eltwise.unary.asinh.asinh
+          - eltwise.unary.acosh.acosh
+          - eltwise.unary.acosh.acosh_sharded
+          - eltwise.unary.acos.acos
+          - eltwise.unary.acos.acos_sharded
           - eltwise.unary.cosh.cosh
           - eltwise.unary.relu_min.relu_min
           - eltwise.unary.relu_min.relu_min_sharded

diff --git a/CODEOWNERS b/CODEOWNERS
@@ -98,12 +98,12 @@ ttnn/cpp/ttnn/deprecated/tt_lib/csrc/ @ayerofieiev-tt @razorback3 @dongjin-na
 
 ttnn/cpp/ttnn/operations/moreh*/ @razorback3 @dongjin-na @cfjchu @ayerofieiev-tt @dmakoviichuk-tt
 ttnn/cpp/ttnn/operations/ccl/ @SeanNijjar @cfjchu @jvegaTT @tt-aho
-ttnn/cpp/ttnn/operations/pool/ @mywoodstock @shwetankTT @sankarmanoj-tt @pavlejosipovic
-ttnn/cpp/ttnn/operations/conv/ @mywoodstock @shwetankTT @sankarmanoj-tt @pavlejosipovic
-ttnn/cpp/ttnn/operations/sliding_window/ @mywoodstock @sankarmanoj-tt @pavlejosipovic
+ttnn/cpp/ttnn/operations/pool/ @tenstorrent/metalium-developers-convolutions
+ttnn/cpp/ttnn/operations/conv/ @tenstorrent/metalium-developers-convolutions
+ttnn/cpp/ttnn/operations/sliding_window/ @tenstorrent/metalium-developers-convolutions
 ttnn/cpp/ttnn/operations/data_movement/ @ntarafdar @sjameelTT @jaykru-tt @yugi957 @jvegaTT @llongTT @nardoTT
-ttnn/cpp/ttnn/operations/data_movement/fold/ @mywoodstock @shwetankTT @sankarmanoj-tt @pavlejosipovic
-ttnn/cpp/ttnn/operations/data_movement/untilize_with_halo_v2/ @mywoodstock @shwetankTT @sankarmanoj-tt @pavlejosipovic
+ttnn/cpp/ttnn/operations/data_movement/fold/ @tenstorrent/metalium-developers-convolutions
+ttnn/cpp/ttnn/operations/data_movement/untilize_with_halo_v2/ @tenstorrent/metalium-developers-convolutions
 ttnn/cpp/ttnn/operations/matmul/ @TT-BrianLiu @bbradelTT @yugaoTT @asandhupatlaTT
 ttnn/cpp/ttnn/operations/experimental/ccl/ @SeanNijjar @jvegaTT @tt-aho
 ttnn/cpp/ttnn/operations/experimental/matmul/ @TT-BrianLiu @bbradelTT @yugaoTT @asandhupatlaTT
@@ -120,7 +120,7 @@ tests/ttnn/unit_tests/operations/eltwise/ @patrickroberts @yan-zaretskiy @eyonla
 tests/sweep_framework/ @xanderchin @jdesousa-TT @sjameelTT
 tests/sweep_framework/sweeps
 tests/sweep_framework/sweeps/eltwise/ @patrickroberts @yan-zaretskiy @eyonland
-tests/sweep_framework/sweeps/conv2d/  @nkpatel-tt @mywoodstock @shwetankTT @sankarmanoj-tt @pavlejosipovic
+tests/sweep_framework/sweeps/conv2d/  @tenstorrent/metalium-developers-convolutions
 tests/sweep_framework/sweeps/data_movement/  @sjameelTT @ntarafdar @jaykru-tt @yugi957 @llongTT @jvegaTT @nardoTT
 tests/sweep_framework/sweeps/fused/  @bbradelTT @asandhupatlaTT @sjameelTT
 tests/sweep_framework/sweeps/matmul/  @bbradelTT @asandhupatlaTT @sjameelTT
@@ -134,7 +134,7 @@ tests/ttnn/distributed/ @cfjchu @ayerofieiev-tt @dmakoviichuk-tt @omilyutin-tt
 # models
 /models/ @uaydonat
 /models/*/**
-models/conv_on_device_utils*.py @mywoodstock @shwetankTT @sankarmanoj-tt
+models/conv_on_device_utils*.py @tenstorrent/metalium-developers-convolutions
 functional_*/ @uaydonat @esmalTT
 models/demos @uaydonat
 models/demos/metal_BERT_large_11 @tt-aho @TT-BrianLiu
@@ -146,7 +146,7 @@ models/demos/falcon7b_common @skhorasganiTT @djordje-tt @uaydonat
 models/demos/wormhole/mamba @esmalTT @uaydonat @kpaigwar
 models/demos/wormhole/falcon7b @skhorasganiTT @djordje-tt @uaydonat
 models/demos/wormhole/mistral7b @yieldthought @uaydonat @mtairum
-models/demos/wormhole/stable_diffusion @esmalTT @uaydonat @mywoodstock
+models/demos/wormhole/stable_diffusion @esmalTT @uaydonat @tenstorrent/metalium-developers-convolutions
 models/demos/t3000/falcon40b @uaydonat @djordje-tt @johanna-rock-tt
 models/demos/t3000/falcon7b @skhorasganiTT @djordje-tt @uaydonat
 models/demos/t3000/llama2_70b @cglagovichTT @uaydonat @johanna-rock-tt @djordje-tt @kpaigwar
@@ -155,10 +155,10 @@ models/demos/t3000/mixtral8x7b @yieldthought @mtairum @uaydonat
 models/demos/tg/llama3_70b @cglagovichTT @uaydonat @johanna-rock-tt @djordje-tt @kpaigwar
 models/demos/tg/falcon7b @skhorasganiTT @djordje-tt @uaydonat
 models/demos/grayskull @uaydonat
-models/demos/yolov4 @dvartaniansTT @shwetankTT
-models/demos/wormhole/yolov4 @dvartaniansTT @shwetankTT
-models/demos/**/*resnet* @mywoodstock @shwetankTT @tt-aho
-models/experimental/functional_unet @esmalTT @uaydonat @mywoodstock
+models/demos/yolov4 @dvartaniansTT @tenstorrent/metalium-developers-convolutions
+models/demos/wormhole/yolov4 @dvartaniansTT @tenstorrent/metalium-developers-convolutions
+models/demos/**/*resnet*  @tt-aho @tenstorrent/metalium-developers-convolutions
+models/experimental/functional_unet @esmalTT @uaydonat @tenstorrent/metalium-developers-convolutions
 models/perf/ @uaydonat
 models/perf/perf_report.py @yieldthought @uaydonat
 models/perf/benchmarking_utils.py @skhorasganiTT
@@ -169,9 +169,9 @@ docs/source/ttnn/ttnn/dependencies/tt_lib.rst @eyonland @patrickroberts @yan-zar
 docs/source/ttnn/ @eyonland @patrickroberts @yan-zaretskiy @ayerofieiev-tt @razorback3 @dongjin-na
 
 # misc
-tests/**/dtx/ @mywoodstock @sankarmanoj-tt
-tests/**/*test*conv*.py @mywoodstock @sankarmanoj-tt
-tests/python_api_testing/conv/ @mywoodstock @sankarmanoj-tt
+tests/**/dtx/ @tenstorrent/metalium-developers-convolutions
+tests/**/*test*conv*.py @tenstorrent/metalium-developers-convolutions
+tests/ttnn/unit_tests/operations/convolution @tenstorrent/metalium-developers-convolutions
 tests/python_api_testing/unit_testing/fallback_ops @tt-aho
 scripts/profiler/ @mo-tenstorrent
 scripts/docker @tenstorrent/metalium-developers-infra

diff --git a/docs/source/ttnn/ttnn/api.rst b/docs/source/ttnn/ttnn/api.rst
@@ -444,6 +444,7 @@ Normalization
    ttnn.group_norm
    ttnn.layer_norm
    ttnn.rms_norm
+   ttnn.batch_norm
 
 
 Moreh Operations

diff --git a/tests/sweep_framework/sweep_utils/reduction_common.py b/tests/sweep_framework/sweep_utils/reduction_common.py
@@ -55,3 +55,47 @@ def run_sum(
     pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999)
     # print(f"input_shape {input_shape} pcc {pcc}")
     return [pcc, e2e_perf]
+
+
+def run_prod(
+    input_shape,
+    dim,
+    keepdim,
+    input_a_dtype,
+    input_a_layout,
+    input_a_memory_config,
+    output_memory_config,
+    device,
+) -> list:
+    data_seed = random.randint(0, 20000000)
+    torch.manual_seed(data_seed)
+
+    if input_a_dtype == ttnn.float32 and ttnn.device.is_grayskull(device):
+        return [(False, "Dest Fp32 mode is not supported for arch grayskull"), 0]
+
+    torch_input_tensor_a = gen_func_with_cast_tt(
+        partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype
+    )(input_shape)
+
+    dim = dim % len(input_shape)
+
+    torch_output_tensor = torch.prod(torch_input_tensor_a, dim=dim, keepdim=keepdim)
+
+    input_tensor_a = ttnn.from_torch(
+        torch_input_tensor_a,
+        dtype=input_a_dtype,
+        layout=input_a_layout,
+        device=device,
+        memory_config=input_a_memory_config,
+    )
+
+    start_time = start_measuring_time()
+    result = ttnn.prod(input_tensor_a, dim=dim, keepdim=keepdim, memory_config=output_memory_config)
+    output_tensor = ttnn.to_torch(result)
+    e2e_perf = stop_measuring_time(start_time)
+
+    pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999)
+    assert len(output_tensor.shape) == len(torch_output_tensor.shape)
+    assert output_tensor.shape == torch_output_tensor.shape
+    # print(f"input_shape {input_shape} pcc {pcc}")
+    return [pcc, e2e_perf]
diff --git a/tests/sweep_framework/sweeps/eltwise/unary/abs/abs.py b/tests/sweep_framework/sweeps/eltwise/unary/abs/abs.py
@@ -6,77 +6,71 @@
 from functools import partial
 
 import torch
-import random
 import ttnn
 from tests.sweep_framework.sweep_utils.utils import gen_shapes
 from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt
 
 from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
 from models.utility_functions import torch_random
 
-# Override the default timeout in seconds for hang detection.
-TIMEOUT = 30
-
-random.seed(0)
-
 
 # Parameters provided to the test vector generator are defined here.
 # They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values.
-# Each suite has a key name (in this case "suite_1" and "suite_2") which will associate the test vectors to this specific suite of inputs.
+# Each suite has a key name (in this case "suite_1") which will associate the test vectors to this specific suite of inputs.
 # Developers can create their own generator functions and pass them to the parameters as inputs.
 parameters = {
     "nightly": {
-        "input_shape": gen_shapes([1, 1, 32, 32], [6, 12, 256, 256], [1, 1, 32, 32], 128),
-        "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
-        "input_a_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT],
-        "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+        "input_shape": gen_shapes([1, 1, 32, 32], [6, 12, 256, 256], [1, 1, 32, 32], 16)
+        + gen_shapes([1, 32, 32], [12, 256, 256], [1, 32, 32], 16)
+        + gen_shapes([32, 32], [256, 256], [32, 32], 32),
+        "input_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
+        "input_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT],
+        "input_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
         "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
-    },
+    }
 }
 
 
 # Invalidate vector is called during the generation phase where each vector will be passed in.
 # If invalidated, the vector will still be stored but will be skipped.
 # Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
 def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
-    if test_vector["input_a_layout"] == ttnn.ROW_MAJOR_LAYOUT:
-        return True, "Row Major layout is not supported"
+    if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT or test_vector["input_dtype"] == ttnn.bfloat8_b:
+        return True, "ROW_MAJOR_LAYOUT and ttnn.bfloat8_b are not supported"
     return False, None
 
 
 # This is the run instructions for the test, defined by the developer.
 # The run function must take the above-defined parameters as inputs.
 # The runner will call this run function with each test vector, and the returned results from this function will be stored.
-# If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
+# If you defined a device_mesh_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
 def run(
     input_shape,
-    input_a_dtype,
-    input_a_layout,
-    input_a_memory_config,
+    input_dtype,
+    input_layout,
+    input_memory_config,
     output_memory_config,
     *,
     device,
 ) -> list:
-    data_seed = random.randint(0, 20000000)
-    torch.manual_seed(data_seed)
+    torch.manual_seed(0)
 
-    torch_input_tensor_a = gen_func_with_cast_tt(
-        partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype
+    torch_input_tensor = gen_func_with_cast_tt(
+        partial(torch_random, low=-100, high=100, dtype=torch.float32), input_dtype
     )(input_shape)
 
-    golden_function = ttnn.get_golden_function(ttnn.abs)
-    torch_output_tensor = golden_function(torch_input_tensor_a)
+    torch_output_tensor = torch.abs(torch_input_tensor)
 
-    input_tensor_a = ttnn.from_torch(
-        torch_input_tensor_a,
-        dtype=input_a_dtype,
-        layout=input_a_layout,
+    input_tensor = ttnn.from_torch(
+        torch_input_tensor,
+        dtype=input_dtype,
+        layout=input_layout,
         device=device,
-        memory_config=input_a_memory_config,
+        memory_config=input_memory_config,
     )
 
     start_time = start_measuring_time()
-    result = ttnn.abs(input_tensor_a, memory_config=output_memory_config)
+    result = ttnn.abs(input_tensor, memory_config=output_memory_config)
     output_tensor = ttnn.to_torch(result)
     e2e_perf = stop_measuring_time(start_time)
 

diff --git a/tests/sweep_framework/sweeps/eltwise/unary/acos/acos.py b/tests/sweep_framework/sweeps/eltwise/unary/acos/acos.py
@@ -0,0 +1,77 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+from functools import partial
+
+import torch
+import ttnn
+from tests.sweep_framework.sweep_utils.utils import gen_shapes
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+
+# Parameters provided to the test vector generator are defined here.
+# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values.
+# Each suite has a key name (in this case "suite_1") which will associate the test vectors to this specific suite of inputs.
+# Developers can create their own generator functions and pass them to the parameters as inputs.
+parameters = {
+    "nightly": {
+        "input_shape": gen_shapes([1, 1, 32, 32], [6, 12, 256, 256], [1, 1, 32, 32], 16)
+        + gen_shapes([1, 32, 32], [12, 256, 256], [1, 32, 32], 16)
+        + gen_shapes([32, 32], [256, 256], [32, 32], 32),
+        "input_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
+        "input_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT],
+        "input_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+        "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
+    }
+}
+
+
+# Invalidate vector is called during the generation phase where each vector will be passed in.
+# If invalidated, the vector will still be stored but will be skipped.
+# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
+def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
+    if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT or test_vector["input_dtype"] == ttnn.bfloat8_b:
+        return True, "ROW_MAJOR_LAYOUT and ttnn.bfloat8_b are not supported"
+    return False, None
+
+
+# This is the run instructions for the test, defined by the developer.
+# The run function must take the above-defined parameters as inputs.
+# The runner will call this run function with each test vector, and the returned results from this function will be stored.
+# If you defined a device_mesh_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
+def run(
+    input_shape,
+    input_dtype,
+    input_layout,
+    input_memory_config,
+    output_memory_config,
+    *,
+    device,
+) -> list:
+    torch.manual_seed(0)
+
+    torch_input_tensor = gen_func_with_cast_tt(partial(torch_random, low=-1, high=1, dtype=torch.float32), input_dtype)(
+        input_shape
+    )
+
+    torch_output_tensor = torch.acos(torch_input_tensor)
+
+    input_tensor = ttnn.from_torch(
+        torch_input_tensor,
+        dtype=input_dtype,
+        layout=input_layout,
+        device=device,
+        memory_config=input_memory_config,
+    )
+
+    start_time = start_measuring_time()
+    result = ttnn.acos(input_tensor, memory_config=output_memory_config)
+    output_tensor = ttnn.to_torch(result)
+    e2e_perf = stop_measuring_time(start_time)
+
+    return [check_with_pcc(torch_output_tensor, output_tensor, 0.999), e2e_perf]