Skip to content

Commit

Permalink
#16366: Changed default kernal_config_val for 32bit matmul (#16567)
Browse files Browse the repository at this point in the history
### Ticket
Link to Github Issue
#16366

### Problem description
FP32 PCC not accurate enough for a matmul with default kernel config

### What's changed
Enabled FP32 and disabled L1ACC for FP32

### Checklist
- [x] Post commit CI passes
- [x] Blackhole Post commit (if applicable)
- [x] Model regression CI testing passes (if applicable)
- [x] Device performance regression CI testing passes (if applicable)
- [x] **(For models and ops writers)** Full [new
models](https://github.com/tenstorrent/tt-metal/actions/workflows/full-new-models-suite.yaml)
tests passes
- [ ] New/Existing tests provide coverage for changes
  • Loading branch information
vsureshTT authored Jan 10, 2025
1 parent f479c4f commit 652490d
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 8 deletions.
15 changes: 15 additions & 0 deletions tests/ttnn/unit_tests/operations/test_matmul.py
Original file line number Diff line number Diff line change
Expand Up @@ -2111,3 +2111,18 @@ def test_optional_output_argument(device, n_size, c, m, k, n):
assert_with_pcc(torch_output_tensor, output, 0.999)
assert_with_pcc(torch_output_tensor, optional_output_tensor, 0.999)
assert_with_pcc(output, optional_output_tensor, 0.999)


def test_small_matmul_pcc(device):
torch.manual_seed(0)
pcc = 0.99
torch_input_tensor_a = torch.rand([1, 2048])
torch_input_tensor_b = torch.rand([2048, 1000])
torch_output_tensor = torch.matmul(torch_input_tensor_a, torch_input_tensor_b)

input_tensor_a = ttnn.from_torch(torch_input_tensor_a, layout=ttnn.TILE_LAYOUT, device=device)
input_tensor_b = ttnn.from_torch(torch_input_tensor_b, layout=ttnn.TILE_LAYOUT, device=device)
output1 = ttnn.matmul(input_tensor_a, input_tensor_b)
output_tensor = ttnn.to_torch(output1)

assert_with_pcc(torch_output_tensor, output_tensor, pcc=pcc)
16 changes: 8 additions & 8 deletions ttnn/cpp/ttnn/operations/matmul/device/matmul_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1225,13 +1225,6 @@ Matmul create_matmul_struct(
(input_tensor_b.get_dtype() == DataType::BFLOAT8_B || input_tensor_b.get_dtype() == DataType::BFLOAT4_B));
const auto increase_fidelity = !has_program_config && !has_user_grid && !are_inputs_low_precision_df;
auto math_fidelity = increase_fidelity ? MathFidelity::HiFi2 : MathFidelity::LoFi;
auto kernel_config_val = init_device_compute_kernel_config(
arch,
parameters.compute_kernel_config,
math_fidelity,
/*default_approx_mode=*/false,
/*default_fp32_acc=*/false,
/*default_l1_acc=*/true);
bool broadcast_batch =
parameters.bcast_batch.value_or(get_broadcast_batch(input_tensor_a, input_tensor_b, parameters.program_config));
TT_FATAL(!(has_user_grid && has_program_config), "Cannot use both user core grid/coordinates and a program config");
Expand Down Expand Up @@ -1267,7 +1260,14 @@ Matmul create_matmul_struct(
output_dtype = input_tensor_a.get_dtype();
}
}

bool is_float_32 = output_dtype==DataType::FLOAT32;
auto kernel_config_val = init_device_compute_kernel_config(
arch,
parameters.compute_kernel_config,
math_fidelity,
/*default_approx_mode=*/false,
/*default_fp32_acc=*/is_float_32,
/*default_l1_acc=*/!is_float_32);
auto in0_tile = input_tensor_a.get_tensor_spec().tile();
auto in1_tile = input_tensor_b.get_tensor_spec().tile();
tt::tt_metal::Tile output_tile = get_output_tile(output_mem_config, in0_tile, in1_tile, parameters.output_tile);
Expand Down

0 comments on commit 652490d

Please sign in to comment.