Skip to content

Commit

Permalink
unpack_A whole tile
Browse files Browse the repository at this point in the history
  • Loading branch information
rdjogoTT committed Mar 28, 2024
1 parent 3013caa commit 4f289f6
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 13 deletions.
4 changes: 3 additions & 1 deletion common/inc/cunpack_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,9 @@ namespace ckernel::unpacker

for (uint i=0; i<CONFIG_SIZE; i++) cfg[THCON_SEC1_REG2_Out_data_format_ADDR32+i]=config.val[i];

uint unpA_x_end = (unpA_face_r_dim == 0) ? 1 : (unpA_face_r_dim << 4) - 1;
uint unpA_x_end = (unpA_face_r_dim == 0) ? 1 :
(unpA_num_faces == 4 && !transpose_xy_srca_en && unpA_face_r_dim == FACE_R_DIM) ? (unpA_face_r_dim << 6) - 1 :
(unpA_face_r_dim << 4) - 1;
TTI_SETADCXX(p_setadc::UNP_A, unpA_x_end, 0x0);
TTI_SETADCXX(p_setadc::UNP_B, (unpB_face_r_dim << 4)-1, 0x0);

Expand Down
24 changes: 18 additions & 6 deletions llk_lib/llk_math_eltwise_unary_datacopy.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,15 @@ inline void eltwise_unary_configure_mop(uint rows_per_inst, uint total_rows, con
tmp.set_end_op(TT_OP_SETRWC(p_setrwc::CLR_AB, 0, 0, 0, 0, p_setrwc::SET_AB));
tmp.program(instrn_buffer);
} else {
ckernel_template tmp(outerloop, innerloop, TT_OP_MOVA2D(0, 0, ADDR_MOD_2, p_mova2d::MOV_8_ROWS, 0));
tmp.set_end_op(TT_OP_SETRWC(p_setrwc::CLR_AB, 0, 0, 0, 0, p_setrwc::SET_AB));
tmp.program(instrn_buffer);
if (total_rows == 16 && num_faces == 4) {
ckernel_template tmp(1, outerloop*innerloop, TT_OP_MOVA2D(0, 0, ADDR_MOD_2, p_mova2d::MOV_8_ROWS, 0));
tmp.set_end_op(TT_OP_SETRWC(p_setrwc::CLR_AB, 0, 0, 0, 0, p_setrwc::SET_AB));
tmp.program(instrn_buffer);
} else {
ckernel_template tmp(outerloop, innerloop, TT_OP_MOVA2D(0, 0, ADDR_MOD_2, p_mova2d::MOV_8_ROWS, 0));
tmp.set_end_op(TT_OP_SETRWC(p_setrwc::CLR_AB, 0, 0, 0, 0, p_setrwc::SET_AB));
tmp.program(instrn_buffer);
}
}

} else if constexpr (type == B2D) {
Expand Down Expand Up @@ -168,9 +174,15 @@ inline void eltwise_unary_configure_mop(uint rows_per_inst, uint total_rows, con
tmp.set_end_op(TT_OP_SETRWC(p_setrwc::CLR_B, p_setrwc::CR_B, 0, 0, 0, p_setrwc::SET_B));
tmp.program(instrn_buffer);
} else {
ckernel_template tmp(outerloop, innerloop, TT_OP_MOVB2D(0, 0, addr_mod, rows_per_inst, 0));
tmp.set_end_op(TT_OP_SETRWC(p_setrwc::CLR_B, p_setrwc::CR_B, 0, 0, 0, p_setrwc::SET_B));
tmp.program(instrn_buffer);
if (total_rows == 16 && num_faces == 4) {
ckernel_template tmp(1, outerloop*innerloop, TT_OP_MOVB2D(0, 0, addr_mod, rows_per_inst, 0));
tmp.set_end_op(TT_OP_SETRWC(p_setrwc::CLR_B, p_setrwc::CR_B, 0, 0, 0, p_setrwc::SET_B));
tmp.program(instrn_buffer);
} else {
ckernel_template tmp(outerloop, innerloop, TT_OP_MOVB2D(0, 0, addr_mod, rows_per_inst, 0));
tmp.set_end_op(TT_OP_SETRWC(p_setrwc::CLR_B, p_setrwc::CR_B, 0, 0, 0, p_setrwc::SET_B));
tmp.program(instrn_buffer);
}
}
}
}
Expand Down
27 changes: 21 additions & 6 deletions llk_lib/llk_unpack_A.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,19 @@ inline void _llk_unpack_A_mop_config_(const bool transpose_of_faces, const std::
}
tmp.program(instrn_buffer);
} else {
const uint32_t outerloop = num_faces;
constexpr uint32_t innerloop = 1;
ckernel_template tmp(outerloop, innerloop, unpack_srcb_zerosrc, unpack_srcb_set_dvalid);
tmp.set_start_op(unpack_srca);
tmp.program(instrn_buffer);
if (num_faces == 4) {
const uint32_t outerloop = 1;
constexpr uint32_t innerloop = 1;
ckernel_template tmp(outerloop, innerloop, unpack_srcb_zerosrc, unpack_srcb_set_dvalid);
tmp.set_start_op(unpack_srca);
tmp.program(instrn_buffer);
} else {
const uint32_t outerloop = num_faces;
constexpr uint32_t innerloop = 1;
ckernel_template tmp(outerloop, innerloop, unpack_srcb_zerosrc, unpack_srcb_set_dvalid);
tmp.set_start_op(unpack_srca);
tmp.program(instrn_buffer);
}
}
}
}
Expand All @@ -158,7 +166,14 @@ inline void _llk_unpack_A_hw_configure_(const std::uint32_t unpack_src_format, c
template <BroadcastType BType = BroadcastType::NONE, bool acc_to_dest = false, EltwiseBinaryReuseDestType binary_reuse_dest = EltwiseBinaryReuseDestType::NONE, bool unpack_to_dest = false>
inline void _llk_unpack_A_init_(const std::uint32_t transpose_of_faces=0, const std::uint32_t within_face_16x16_transpose=0, const std::uint32_t face_r_dim = FACE_R_DIM, const std::uint32_t num_faces = 4, const std::uint32_t unpack_src_format = 0, const std::uint32_t unpack_dst_format = 0) {
constexpr std::uint32_t UNP_SEL = (BType == BroadcastType::NONE) ? p_setadc::UNP_A : p_setadc::UNP_B;
config_unpacker_x_end<UNP_SEL>(face_r_dim);

if (num_faces == 4 && !transpose_of_faces && !within_face_16x16_transpose && !acc_to_dest) {
const uint32_t unpSEL_x_end = num_faces*face_r_dim*FACE_C_DIM-1;
TT_SETADCXX(UNP_SEL, unpSEL_x_end, 0x0);
} else {
config_unpacker_x_end<UNP_SEL>(face_r_dim);
}

_llk_unpack_A_mop_config_<BType, acc_to_dest, binary_reuse_dest, unpack_to_dest>(transpose_of_faces>0, num_faces, unpack_src_format, unpack_dst_format);
}

Expand Down

0 comments on commit 4f289f6

Please sign in to comment.