#14179: Formating conv2d results.

Co-authored-by: Lewis Panos <[email protected]>
tenstorrent · Dec 9, 2024 · c12bf8e · c12bf8e
1 parent 4b6e844
commit c12bf8e
Show file tree

Hide file tree

Showing 24 changed files with 216 additions and 74 deletions.
diff --git a/models/demos/convnet_mnist/tt/convnet_mnist.py b/models/demos/convnet_mnist/tt/convnet_mnist.py
@@ -35,7 +35,7 @@ def convnet_mnist(
         packer_l1_acc=False,
     )
     x = ttnn.to_layout(input_tensor, layout=ttnn.ROW_MAJOR_LAYOUT)
-    [x, out_height, out_width, weights_device, bias_device] = ttnn.conv2d(
+    x = ttnn.conv2d(
         input_tensor=x,
         weight_tensor=parameters.conv1.weight,
         in_channels=1,
@@ -53,6 +53,8 @@ def convnet_mnist(
         conv_op_cache={},
         debug=True,
         groups=1,
+        return_output_dim=False,
+        return_weights_and_bias=False,
     )
     x = ttnn.relu(x)
 
@@ -79,7 +81,7 @@ def convnet_mnist(
             dilation=[1, 1],
         )
 
-    [x, out_height, out_width, weights_device, bias_device] = ttnn.conv2d(
+    x, [out_height, out_width] = ttnn.conv2d(
         input_tensor=x,
         weight_tensor=parameters.conv2.weight,
         in_channels=32,
@@ -96,6 +98,8 @@ def convnet_mnist(
         conv_op_cache={},
         debug=False,
         groups=1,
+        return_output_dim=True,
+        return_weights_and_bias=False,
     )
 
     x = ttnn.relu(x)

diff --git a/models/demos/segformer/tt/common.py b/models/demos/segformer/tt/common.py
@@ -60,7 +60,7 @@ def __call__(self, device, input_tensor):
         if self.act_block_h is not None:
             conv_config.act_block_h_override = self.act_block_h
 
-        [output_tensor, _out_height, _out_width, self.weights, self.bias] = ttnn.conv2d(
+        [output_tensor, [_out_height, _out_width]] = ttnn.conv2d(
             input_tensor=input_tensor,
             weight_tensor=self.weights,
             bias_tensor=self.bias,
@@ -76,6 +76,8 @@ def __call__(self, device, input_tensor):
             conv_config=conv_config,
             compute_config=compute_config,
             groups=self.groups,
+            return_output_dim=True,
+            return_weights_and_bias=False,
         )
 
         return output_tensor, _out_height, _out_width
diff --git a/models/demos/squeezebert/tt/ttnn_functional_squeezebert.py b/models/demos/squeezebert/tt/ttnn_functional_squeezebert.py
@@ -90,7 +90,7 @@ def ttnn_conv1d(
         packer_l1_acc=packer_l1_acc,
     )
 
-    [tt_output_tensor_on_device, out_length, weights_device, bias_device] = ttnn.Conv1d(
+    [tt_output_tensor_on_device, out_length, [weights_device, bias_device]] = ttnn.Conv1d(
         input_tensor=tt_input_tensor,
         weight_tensor=weights,
         in_channels=tt_input_tensor.shape[-1],
@@ -107,6 +107,8 @@ def ttnn_conv1d(
         conv_op_cache={},
         debug=debug,
         groups=groups,
+        return_output_dim=True,
+        return_weights_and_bias=True,
     )
 
     tt_output_tensor_on_device = ttnn.squeeze(tt_output_tensor_on_device, 0)

diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_large_new_conv_api.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_large_new_conv_api.py
@@ -167,7 +167,7 @@ def run_downsample_if_req(
             shard_layout = (
                 ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
             )
-            ds_out, _, _, self.ds_conv_weight_tensor, self.ds_conv_bias_tensor = ttnn.conv2d(
+            ds_out, [self.ds_conv_weight_tensor, self.ds_conv_bias_tensor] = ttnn.conv2d(
                 input_tensor=x,
                 weight_tensor=self.ds_conv_weight_tensor,
                 in_channels=self.ds_conv_input_channels,
@@ -192,6 +192,8 @@ def run_downsample_if_req(
                     device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
                 ),
                 conv_op_cache=conv_op_cache,
+                return_output_dim=False,
+                return_weights_and_bias=True,
             )
             ttnn.deallocate(x)
             ds_out = ttnn.reallocate(ds_out)
@@ -216,7 +218,7 @@ def __call__(
         # conv1 is 1x1 conv
         # print("Running conv1")
         module_input_height = input_height
-        out, input_height, input_width, self.conv1_weight_tensor, self.conv1_bias_tensor = ttnn.conv2d(
+        out, [input_height, input_width], [self.conv1_weight_tensor, self.conv1_bias_tensor] = ttnn.conv2d(
             input_tensor=x,
             weight_tensor=self.conv1_weight_tensor,
             in_channels=self.conv1_input_channels,
@@ -242,6 +244,8 @@ def __call__(
                 device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
             ),
             conv_op_cache=conv_op_cache,
+            return_output_dim=True,
+            return_weights_and_bias=True,
         )
 
         act_block_h_override = 0
@@ -281,7 +285,7 @@ def __call__(
         )
         # if ds_out_mem_config and ds_out_mem_config != ttnn.get_memory_config(out):
         #     out = ttnn.to_memory_config(out, ds_out_mem_config)
-        out, input_height, input_width, self.conv2_weight_tensor, self.conv2_bias_tensor = ttnn.conv2d(
+        out, [input_height, input_width], [self.conv2_weight_tensor, self.conv2_bias_tensor] = ttnn.conv2d(
             input_tensor=out,
             weight_tensor=self.conv2_weight_tensor,
             in_channels=self.conv2_input_channels,
@@ -310,11 +314,13 @@ def __call__(
                 device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
             ),
             conv_op_cache=conv_op_cache,
+            return_output_dim=True,
+            return_weights_and_bias=True,
         )
 
         # conv3 is 1x1 conv
         # print("Running conv3")
-        out, _, _, self.conv3_weight_tensor, self.conv3_bias_tensor = ttnn.conv2d(
+        out, [self.conv3_weight_tensor, self.conv3_bias_tensor] = ttnn.conv2d(
             input_tensor=out,
             weight_tensor=self.conv3_weight_tensor,
             in_channels=self.conv3_input_channels,
@@ -339,6 +345,8 @@ def __call__(
                 device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
             ),
             conv_op_cache=conv_op_cache,
+            return_weights_and_bias=True,
+            return_output_dim=False,
         )
 
         if not self.run_downsample_before_conv2:
@@ -554,7 +562,7 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
                 input_tensor, device=device, memory_config=self.grayskull_conv1_input_memory_config
             )
 
-        x, x_height, x_width, self.conv1_weight_tensor, self.conv1_bias_tensor = ttnn.conv2d(
+        x, [x_height, x_width], [self.conv1_weight_tensor, self.conv1_bias_tensor] = ttnn.conv2d(
             input_tensor=input_tensor,
             weight_tensor=self.conv1_weight_tensor,
             in_channels=self.conv1_input_channels,
@@ -579,6 +587,8 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
                 device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
             ),
             conv_op_cache=conv_op_cache,
+            return_output_dim=True,
+            return_weights_and_bias=True,
         )
         # Relu is fused with conv1
 
@@ -867,7 +877,7 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
                 input_tensor, device=device, memory_config=self.grayskull_conv1_input_memory_config
             )
 
-        x, x_height, x_width, self.conv1_weight_tensor, self.conv1_bias_tensor = ttnn.conv2d(
+        x, [x_height, x_width], [self.conv1_weight_tensor, self.conv1_bias_tensor] = ttnn.conv2d(
             input_tensor=input_tensor,
             weight_tensor=self.conv1_weight_tensor,
             in_channels=self.conv1_input_channels,
@@ -892,6 +902,8 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
                 device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
             ),
             conv_op_cache=conv_op_cache,
+            return_output_dim=True,
+            return_weights_and_bias=True,
         )
         # Relu is fused with conv1
 

diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_new_conv_api.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_new_conv_api.py
@@ -160,7 +160,7 @@ def run_downsample_if_req(
     ):
         if self.downsample:
             logger.debug(f"Running downsample")
-            ds_out, _, _, self.ds_conv_weight_tensor, self.ds_conv_bias_tensor = ttnn.conv2d(
+            ds_out, [self.ds_conv_weight_tensor, self.ds_conv_bias_tensor] = ttnn.conv2d(
                 input_tensor=x,
                 weight_tensor=self.ds_conv_weight_tensor,
                 in_channels=self.ds_conv_input_channels,
@@ -198,6 +198,8 @@ def run_downsample_if_req(
                     packer_l1_acc=packer_l1_accum_enabled,
                 ),
                 conv_op_cache=conv_op_cache,
+                return_output_dim=False,
+                return_weights_and_bias=True,
             )
             ttnn.deallocate(x)
             ds_out = ttnn.reallocate(ds_out)
@@ -229,7 +231,7 @@ def __call__(
         # conv1 is 1x1 conv
         logger.debug(f"Running conv1")
         module_input_height = input_height
-        out, input_height, input_width, self.conv1_weight_tensor, self.conv1_bias_tensor = ttnn.conv2d(
+        out, [input_height, input_width], [self.conv1_weight_tensor, self.conv1_bias_tensor] = ttnn.conv2d(
             input_tensor=x,
             weight_tensor=self.conv1_weight_tensor,
             in_channels=self.conv1_input_channels,
@@ -258,6 +260,8 @@ def __call__(
                 packer_l1_acc=packer_l1_acc,
             ),
             conv_op_cache=conv_op_cache,
+            return_output_dim=True,
+            return_weights_and_bias=True,
         )
 
         act_block_h_override = 0
@@ -313,7 +317,7 @@ def __call__(
 
         reallocate_halo_output = batch_size == 20
         logger.debug(f"Running conv2")
-        out, input_height, input_width, self.conv2_weight_tensor, self.conv2_bias_tensor = ttnn.conv2d(
+        out, [input_height, input_width], [self.conv2_weight_tensor, self.conv2_bias_tensor] = ttnn.conv2d(
             input_tensor=out,
             weight_tensor=self.conv2_weight_tensor,
             in_channels=self.conv2_input_channels,
@@ -349,6 +353,8 @@ def __call__(
                 packer_l1_acc=packer_l1_acc,
             ),
             conv_op_cache=conv_op_cache,
+            return_output_dim=True,
+            return_weights_and_bias=True,
         )
 
         logger.debug(
@@ -367,7 +373,7 @@ def __call__(
 
         # conv3 is 1x1 conv
         logger.debug(f"Running conv3")
-        out, _, _, self.conv3_weight_tensor, self.conv3_bias_tensor = ttnn.conv2d(
+        out, [self.conv3_weight_tensor, self.conv3_bias_tensor] = ttnn.conv2d(
             input_tensor=out,
             weight_tensor=self.conv3_weight_tensor,
             in_channels=self.conv3_input_channels,
@@ -395,6 +401,8 @@ def __call__(
                 packer_l1_acc=packer_l1_acc,
             ),
             conv_op_cache=conv_op_cache,
+            return_output_dim=False,
+            return_weights_and_bias=True,
         )
 
         if not run_downsample_before_conv2:
@@ -734,7 +742,7 @@ def run(self, input_tensor, device, ops_parallel_config, conv_op_cache={}) -> tt
         logger.debug(f"==== first conv")
 
         # first conv
-        x, x_height, x_width, self.conv1_weight_tensor, self.conv1_bias_tensor = ttnn.conv2d(
+        x, [x_height, x_width], [self.conv1_weight_tensor, self.conv1_bias_tensor] = ttnn.conv2d(
             input_tensor=fold_output_tensor,
             weight_tensor=self.conv1_weight_tensor,
             in_channels=self.conv1_input_channels,
@@ -750,6 +758,8 @@ def run(self, input_tensor, device, ops_parallel_config, conv_op_cache={}) -> tt
             conv_config=self.conv1_config,
             compute_config=self.conv1_compute_config,
             conv_op_cache=conv_op_cache,
+            return_output_dim=True,
+            return_weights_and_bias=True,
         )
         # Relu is fused with conv1
         if self.batch_size == 20:

diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_xlarge_new_conv_api.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_xlarge_new_conv_api.py
@@ -162,7 +162,7 @@ def run_downsample_if_req(
         height_sharding=None,
     ):
         if self.downsample:
-            ds_out, _, _, self.ds_conv_weight_tensor, self.ds_conv_bias_tensor = ttnn.conv2d(
+            ds_out, [self.ds_conv_weight_tensor, self.ds_conv_bias_tensor] = ttnn.conv2d(
                 input_tensor=x,
                 weight_tensor=self.ds_conv_weight_tensor,
                 in_channels=self.ds_conv_input_channels,
@@ -189,6 +189,8 @@ def run_downsample_if_req(
                     device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
                 ),
                 conv_op_cache=conv_op_cache,
+                return_output_dim=False,
+                return_weights_and_bias=True,
             )
             ttnn.deallocate(x)
             ds_out = ttnn.reallocate(ds_out)
@@ -211,7 +213,7 @@ def __call__(
         # conv1 is 1x1 conv
         # print("Running conv1")
         module_input_height = input_height
-        out, input_height, input_width, self.conv1_weight_tensor, self.conv1_bias_tensor = ttnn.conv2d(
+        out, [input_height, input_width], [self.conv1_weight_tensor, self.conv1_bias_tensor] = ttnn.conv2d(
             input_tensor=x,
             weight_tensor=self.conv1_weight_tensor,
             in_channels=self.conv1_input_channels,
@@ -237,6 +239,8 @@ def __call__(
                 device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
             ),
             conv_op_cache=conv_op_cache,
+            return_output_dim=True,
+            return_weights_and_bias=True,
         )
 
         act_block_h_override = 0
@@ -274,7 +278,7 @@ def __call__(
             # self.conv1_input_channels == 256 and
             # self.downsample
         )
-        out, input_height, input_width, self.conv2_weight_tensor, self.conv2_bias_tensor = ttnn.conv2d(
+        out, [input_height, input_width], [self.conv2_weight_tensor, self.conv2_bias_tensor] = ttnn.conv2d(
             input_tensor=out,
             weight_tensor=self.conv2_weight_tensor,
             in_channels=self.conv2_input_channels,
@@ -303,11 +307,13 @@ def __call__(
                 device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
             ),
             conv_op_cache=conv_op_cache,
+            return_output_dim=True,
+            return_weights_and_bias=True,
         )
 
         # conv3 is 1x1 conv
         # print("Running conv3")
-        out, _, _, self.conv3_weight_tensor, self.conv3_bias_tensor = ttnn.conv2d(
+        out, [self.conv3_weight_tensor, self.conv3_bias_tensor] = ttnn.conv2d(
             input_tensor=out,
             weight_tensor=self.conv3_weight_tensor,
             in_channels=self.conv3_input_channels,
@@ -332,6 +338,8 @@ def __call__(
                 device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
             ),
             conv_op_cache=conv_op_cache,
+            return_output_dim=False,
+            return_weights_and_bias=True,
         )
 
         if not self.run_downsample_before_conv2:
@@ -524,7 +532,7 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
             elif batch_size == 20:
                 act_block_h_override = 640
 
-        x, x_height, x_width, self.conv1_weight_tensor, self.conv1_bias_tensor = ttnn.conv2d(
+        x, [x_height, x_width], [self.conv1_weight_tensor, self.conv1_bias_tensor] = ttnn.conv2d(
             input_tensor=input_tensor,
             weight_tensor=self.conv1_weight_tensor,
             in_channels=self.conv1_input_channels,
@@ -549,6 +557,8 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
                 device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
             ),
             conv_op_cache=conv_op_cache,
+            return_output_dim=True,
+            return_weights_and_bias=True,
         )
         # Relu is fused with conv1
 
@@ -829,7 +839,7 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
         else:
             act_block_h_override = 0
 
-        x, x_height, x_width, self.conv1_weight_tensor, self.conv1_bias_tensor = ttnn.conv2d(
+        x, [x_height, x_width], [self.conv1_weight_tensor, self.conv1_bias_tensor] = ttnn.conv2d(
             input_tensor=input_tensor,
             weight_tensor=self.conv1_weight_tensor,
             in_channels=self.conv1_input_channels,
@@ -854,6 +864,8 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
                 device.arch(), math_fidelity=self.model_config["MATH_FIDELITY"]
             ),
             conv_op_cache=conv_op_cache,
+            return_output_dim=True,
+            return_weights_and_bias=True,
         )
         # Relu is fused with conv1