#0: Conv2dConfig refactor

tenstorrent · Nov 19, 2024 · 21135db · 21135db
1 parent f6281d2
commit 21135db
Show file tree

Hide file tree

Showing 8 changed files with 51 additions and 51 deletions.
diff --git a/models/demos/segformer/tt/common.py b/models/demos/segformer/tt/common.py
@@ -40,12 +40,8 @@ def __call__(self, device, input_tensor):
         conv_config = ttnn.Conv2dConfig(
             dtype=self.dtype,
             weights_dtype=ttnn.bfloat16,
-            math_fidelity=ttnn.MathFidelity.LoFi,
             activation=self.activation,
             shard_layout=self.shard_layout,
-            math_approx_mode_enabled=True,
-            fp32_dest_acc_enabled=False,
-            packer_l1_accum_enabled=False,
             input_channels_alignment=16 if input_tensor.shape[3] < 16 else 32,
             transpose_shards=False,
             reshard_if_not_optimal=self.reshard,
@@ -54,6 +50,12 @@ def __call__(self, device, input_tensor):
             enable_act_double_buffer=True,
             enable_split_reader=False,
         )
+        compute_config = ttnn.GetComputeKernelConfig(
+            math_fidelity=ttnn.MathFidelity.LoFi,
+            math_approx_mode=True,
+            fp32_dest_acc_en=False,
+            packer_l1_acc=False,
+        )
         if self.act_block_h is not None:
             conv_config.act_block_h_override = self.act_block_h
 
@@ -71,6 +73,7 @@ def __call__(self, device, input_tensor):
             input_height=input_tensor.shape[1],
             input_width=input_tensor.shape[2],
             conv_config=conv_config,
+            compute_config=compute_config,
             groups=self.groups,
         )
 

diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_large_new_conv_api.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_large_new_conv_api.py
@@ -181,12 +181,12 @@ def run_downsample_if_req(
                 conv_config=ttnn.Conv2dConfig(
                     dtype=self.model_config["ACTIVATIONS_DTYPE"],
                     weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                    math_fidelity=self.model_config["MATH_FIDELITY"],
                     shard_layout=shard_layout,
                     deallocate_activation=True,
                     reallocate_halo_output=True,
                     reshard_if_not_optimal=reshard_if_not_optimal,
                 ),
+                compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
                 conv_op_cache=conv_op_cache,
             )
             ttnn.deallocate(x)
@@ -228,13 +228,13 @@ def __call__(
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
                 shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
                 if height_sharding
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
 
@@ -291,7 +291,6 @@ def __call__(
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
                 deallocate_activation=True,
                 reallocate_halo_output=reallocate_halo_output,
@@ -301,6 +300,7 @@ def __call__(
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
 
@@ -322,12 +322,12 @@ def __call__(
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
                 if height_sharding
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
 
@@ -538,12 +538,12 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
                 deallocate_activation=True,
                 input_channels_alignment=16 if not is_wormhole_b0() else 32,
                 act_block_h_override=act_block_h_override,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
         # Relu is fused with conv1
@@ -844,12 +844,12 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
                 deallocate_activation=True,
                 input_channels_alignment=16 if not is_wormhole_b0() else 32,
                 act_block_h_override=act_block_h_override,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
         # Relu is fused with conv1

diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_xlarge_new_conv_api.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_xlarge_new_conv_api.py
@@ -178,14 +178,14 @@ def run_downsample_if_req(
                 conv_config=ttnn.Conv2dConfig(
                     dtype=self.model_config["ACTIVATIONS_DTYPE"],
                     weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                    math_fidelity=self.model_config["MATH_FIDELITY"],
                     shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
                     if height_sharding
                     else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                     deallocate_activation=True,
                     reallocate_halo_output=True,
                     reshard_if_not_optimal=reshard_if_not_optimal,
                 ),
+                compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
                 conv_op_cache=conv_op_cache,
             )
             ttnn.deallocate(x)
@@ -225,13 +225,13 @@ def __call__(
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
                 shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
                 if height_sharding
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
 
@@ -286,7 +286,6 @@ def __call__(
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
                 deallocate_activation=True,
                 reallocate_halo_output=reallocate_halo_output,
@@ -296,6 +295,7 @@ def __call__(
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
 
@@ -317,12 +317,12 @@ def __call__(
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
                 if height_sharding
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
 
@@ -532,12 +532,12 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
                 deallocate_activation=True,
                 input_channels_alignment=16 if not is_wormhole_b0() else 32,
                 act_block_h_override=act_block_h_override,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
         # Relu is fused with conv1
@@ -835,12 +835,12 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
                 deallocate_activation=True,
                 input_channels_alignment=16 if not is_wormhole_b0() else 32,
                 act_block_h_override=act_block_h_override,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
         # Relu is fused with conv1

diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_xlarge_new_conv_api_24.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_xlarge_new_conv_api_24.py
@@ -179,14 +179,14 @@ def run_downsample_if_req(
                 conv_config=ttnn.Conv2dConfig(
                     dtype=self.model_config["ACTIVATIONS_DTYPE"],
                     weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                    math_fidelity=self.model_config["MATH_FIDELITY"],
                     shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
                     if height_sharding
                     else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                     deallocate_activation=True,
                     reallocate_halo_output=True,
                     reshard_if_not_optimal=reshard_if_not_optimal,
                 ),
+                compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
                 conv_op_cache=conv_op_cache,
             )
             ttnn.deallocate(x)
@@ -226,13 +226,13 @@ def __call__(
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
                 shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
                 if height_sharding
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
 
@@ -288,7 +288,6 @@ def __call__(
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
                 deallocate_activation=True,
                 reallocate_halo_output=reallocate_halo_output,
@@ -298,6 +297,7 @@ def __call__(
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
 
@@ -319,12 +319,12 @@ def __call__(
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
                 if height_sharding
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
 
@@ -534,12 +534,12 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
                 deallocate_activation=True,
                 input_channels_alignment=16 if not is_wormhole_b0() else 32,
                 act_block_h_override=act_block_h_override,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
         # Relu is fused with conv1
@@ -865,12 +865,12 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
                 deallocate_activation=True,
                 input_channels_alignment=16 if not is_wormhole_b0() else 32,
                 act_block_h_override=act_block_h_override,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
         # Relu is fused with conv1

diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_xxlarge_new_conv_api.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_xxlarge_new_conv_api.py
@@ -179,7 +179,6 @@ def run_downsample_if_req(
                 conv_config=ttnn.Conv2dConfig(
                     dtype=self.model_config["ACTIVATIONS_DTYPE"],
                     weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                    math_fidelity=self.model_config["MATH_FIDELITY"],
                     shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
                     if height_sharding
                     else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
@@ -188,6 +187,7 @@ def run_downsample_if_req(
                     reshard_if_not_optimal=reshard_if_not_optimal,
                     transpose_shards=height_sharding,
                 ),
+                compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
                 conv_op_cache=conv_op_cache,
             )
             ttnn.deallocate(x)
@@ -232,14 +232,14 @@ def __call__(
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
                 shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
                 if height_sharding
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
                 transpose_shards=height_sharding,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
 
@@ -337,7 +337,6 @@ def __call__(
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
                 deallocate_activation=True,
                 reallocate_halo_output=reallocate_halo_output,
@@ -348,6 +347,7 @@ def __call__(
                 reshard_if_not_optimal=reshard_if_not_optimal,
                 transpose_shards=height_sharding,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
 
@@ -369,13 +369,13 @@ def __call__(
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
                 if height_sharding
                 else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
                 transpose_shards=height_sharding,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
 
@@ -597,13 +597,13 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
                 deallocate_activation=True,
                 reallocate_halo_output=True,
                 input_channels_alignment=16 if not is_wormhole_b0() else 32,
                 act_block_h_override=act_block_h_override,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
         # Relu is fused with conv1
@@ -931,12 +931,12 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
             conv_config=ttnn.Conv2dConfig(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
-                math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
                 deallocate_activation=True,
                 input_channels_alignment=16 if not is_wormhole_b0() else 32,
                 act_block_h_override=act_block_h_override,
             ),
+            compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
             conv_op_cache=conv_op_cache,
         )
         # Relu is fused with conv1