From e7cd350de921a4aa3d5ee2b2a75ee34f9fb48f2a Mon Sep 17 00:00:00 2001 From: Jay Kruer Date: Thu, 21 Nov 2024 17:16:31 -0800 Subject: [PATCH] #0: Restrict forced single core untilize on BH to non-sharded cases (#15353) ### Problem description Forcing single core on BH for sharded tensors was causing failures in recently-enabled tests in BH post-commit. ### What's changed This commit adds another check that only forces single core on BH when the input and output tensors are not sharded. ### Checklist - [ ] Post commit CI passes - [x] Blackhole Post commit (https://github.com/tenstorrent/tt-metal/actions/runs/11963996332) - [ ] Model regression CI testing passes (if applicable) - [ ] Device performance regression CI testing passes (if applicable) - [ ] New/Existing tests provide coverage for changes --- .../operations/data_movement/untilize/device/untilize_op.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ttnn/cpp/ttnn/operations/data_movement/untilize/device/untilize_op.cpp b/ttnn/cpp/ttnn/operations/data_movement/untilize/device/untilize_op.cpp index bbf60f9ef5f..c9c2ff7431a 100644 --- a/ttnn/cpp/ttnn/operations/data_movement/untilize/device/untilize_op.cpp +++ b/ttnn/cpp/ttnn/operations/data_movement/untilize/device/untilize_op.cpp @@ -110,8 +110,9 @@ operation::ProgramWithCallbacks Untilize::create_program( const auto& input_tensor_a = input_tensors.at(0); auto& output_tensor = output_tensors.at(0); auto device_is_blackhole = input_tensor_a.device()->arch() == tt::ARCH::BLACKHOLE; + auto in_or_out_sharded = input_tensor_a.memory_config().is_sharded() || output_tensor.memory_config().is_sharded(); // FIXME: Remove this restriction once multicore untilize is supported on blackhole - if (this->use_multicore && !device_is_blackhole) { + if (this->use_multicore && (in_or_out_sharded || !device_is_blackhole)) { return detail::untilize_multi_core(input_tensor_a, output_tensor, this->use_pack_untilize, this->fp32_dest_acc_en); } else { return detail::untilize_single_core(input_tensor_a, output_tensor, this->use_pack_untilize, this->fp32_dest_acc_en);