From a409c8a055ade8790ca42384e6dd5e4882497c56 Mon Sep 17 00:00:00 2001 From: Akhmed Rakhmati Date: Wed, 21 Feb 2024 22:52:48 +0000 Subject: [PATCH] #4003: infer that 1d systolic array should be used when running ttnn.matmul and ttnn.linear --- ttnn/ttnn/operations/matmul.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/ttnn/ttnn/operations/matmul.py b/ttnn/ttnn/operations/matmul.py index 0fa9c5e54830..cda39c750aee 100644 --- a/ttnn/ttnn/operations/matmul.py +++ b/ttnn/ttnn/operations/matmul.py @@ -194,15 +194,21 @@ def _get_matmul_program_config( input_tensor_a_memory_config = ttnn.get_memory_config(input_tensor_a) input_tensor_b_memory_config = ttnn.get_memory_config(input_tensor_b) - if use_1d_systolic_array is not None: - # TODO: infer if 1D systolic array can be used - if use_1d_systolic_array: - return create_matmul_1d_systolic_array_config( - input_shape_a=input_tensor_a.shape, - input_shape_b=input_tensor_b.shape, - max_core_grid=core_grid, - activation=activation, - ) + if use_1d_systolic_array is None: + # Infer use_1d_systolic_array based on how rectangular the output matrix + height_width_ratio = (math.prod(batch_shape_a) * m_size) / n_size + if height_width_ratio < 1: + height_width_ratio = 1 / height_width_ratio + # 4 is an arbitrary choice. It should probably be inferred based on the device core grid + use_1d_systolic_array = height_width_ratio > 4 + + if use_1d_systolic_array: + return create_matmul_1d_systolic_array_config( + input_shape_a=input_tensor_a.shape, + input_shape_b=input_tensor_b.shape, + max_core_grid=core_grid, + activation=activation, + ) # TODO: clean up the code below by mvoing it to separate create_*_config functions