#4003: infer that 1d systolic array should be used when running ttnn.…

…matmul and ttnn.linear
tenstorrent · Feb 21, 2024 · a409c8a · a409c8a
1 parent 56c36ca
commit a409c8a
Showing 1 changed file with 15 additions and 9 deletions.
diff --git a/ttnn/ttnn/operations/matmul.py b/ttnn/ttnn/operations/matmul.py
@@ -194,15 +194,21 @@ def _get_matmul_program_config(
     input_tensor_a_memory_config = ttnn.get_memory_config(input_tensor_a)
     input_tensor_b_memory_config = ttnn.get_memory_config(input_tensor_b)
 
-    if use_1d_systolic_array is not None:
-        # TODO: infer if 1D systolic array can be used
-        if use_1d_systolic_array:
-            return create_matmul_1d_systolic_array_config(
-                input_shape_a=input_tensor_a.shape,
-                input_shape_b=input_tensor_b.shape,
-                max_core_grid=core_grid,
-                activation=activation,
-            )
+    if use_1d_systolic_array is None:
+        # Infer use_1d_systolic_array based on how rectangular the output matrix
+        height_width_ratio = (math.prod(batch_shape_a) * m_size) / n_size
+        if height_width_ratio < 1:
+            height_width_ratio = 1 / height_width_ratio
+        # 4 is an arbitrary choice. It should probably be inferred based on the device core grid
+        use_1d_systolic_array = height_width_ratio > 4
+
+    if use_1d_systolic_array:
+        return create_matmul_1d_systolic_array_config(
+            input_shape_a=input_tensor_a.shape,
+            input_shape_b=input_tensor_b.shape,
+            max_core_grid=core_grid,
+            activation=activation,
+        )
 
     # TODO: clean up the code below by mvoing it to separate create_*_config functions