diff --git a/llk_lib/llk_math_reduce.h b/llk_lib/llk_math_reduce.h index f938078..3b015d1 100644 --- a/llk_lib/llk_math_reduce.h +++ b/llk_lib/llk_math_reduce.h @@ -172,7 +172,7 @@ inline void _llk_math_reduce_(const uint dst_index, bool narrow_tile = false, co TTI_GAPOOL(p_setrwc::CLR_NONE, p_gpool::DIM_16X16, ADDR_MOD_0, p_gpool::INDEX_DIS, 0); } } - if (!narrow_tile) { + if ((!narrow_tile) && (num_faces>1)) { TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); TTI_SETRWC(p_setrwc::CLR_AB, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D); diff --git a/llk_lib/llk_pack_untilize.h b/llk_lib/llk_pack_untilize.h index 938c5b3..f13aa42 100644 --- a/llk_lib/llk_pack_untilize.h +++ b/llk_lib/llk_pack_untilize.h @@ -40,12 +40,18 @@ inline void _llk_pack_untilize_mop_config_(const std::uint32_t face_r_dim = FACE constexpr uint MOP_OUTER_LOOP = block_ct_dim; - // Inc ch0_y+=1 (addr_mod_0 will increment by 15) - ckernel::ckernel_template tmp(MOP_OUTER_LOOP, MOP_INNER_LOOP, TT_OP_INCADCXY(p_setadc::PAC, 0, 0, 1, 0)); - tmp.set_start_op(TT_OP_PACR(ADDR_MOD_0, ZERO_OUTPUT_FLAG, PACK_SEL(PACKCNT), 0, MEGAROW, 0, 0)); - tmp.set_end_ops(TT_OP_PACR(ADDR_MOD_1, ZERO_OUTPUT_FLAG, PACK_SEL(PACKCNT), 0, MEGAROW, 0, 0), + if (num_faces>1) { + // Inc ch0_y+=1 (addr_mod_0 will increment by 15) + ckernel::ckernel_template tmp(MOP_OUTER_LOOP, MOP_INNER_LOOP, TT_OP_INCADCXY(p_setadc::PAC, 0, 0, 1, 0)); + tmp.set_start_op(TT_OP_PACR(ADDR_MOD_0, ZERO_OUTPUT_FLAG, PACK_SEL(PACKCNT), 0, MEGAROW, 0, 0)); + tmp.set_end_ops(TT_OP_PACR(ADDR_MOD_1, ZERO_OUTPUT_FLAG, PACK_SEL(PACKCNT), 0, MEGAROW, 0, 0), TT_OP_INCADCZW(p_setadc::PAC, 0, 0, 1, 0)); // w cnt points to the next tile - tmp.program(instrn_buffer); + tmp.program(instrn_buffer); + } else { + ckernel::ckernel_template tmp(MOP_OUTER_LOOP, MOP_INNER_LOOP, TT_OP_PACR(ADDR_MOD_1, ZERO_OUTPUT_FLAG, PACK_SEL(PACKCNT), 0, MEGAROW, 0, 0)); + tmp.set_end_op(TT_OP_INCADCZW(p_setadc::PAC, 0, 0, 1, 0)); // w cnt points to the next tile + tmp.program(instrn_buffer); + } } template