Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Snippets][CPU] Moved N_tail processing to the end in BrgemmCopyBKernel #28664

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -226,21 +226,13 @@ void BrgemmCopyBKernel::generate() {
size_t start_out = 0;
size_t start_comp = 0;

auto add_ptr_increments = [&](size_t current_N) {
for (size_t nb = 0; nb < div_up(N_blk, wei_N_blk); nb++) {
const auto current_N = N_blk - nb * wei_N_blk < wei_N_blk ? wei_N_tail : wei_N_blk;
emit_brgemm_copy_b_kernel_call(current_N, K, start_in, start_out, start_comp);

start_in += is_transpose ? K * current_N * wei_data_size : current_N * wei_data_size;
start_out += current_N * vnni_factor * wei_data_size;
start_comp += is_with_comp ? current_N * sizeof(int32_t) : 0;
};

// OneDNN requires tail handling before main iterations
if (wei_N_tail != 0) {
emit_brgemm_copy_b_kernel_call(wei_N_tail, K, start_in, start_out, start_comp);
add_ptr_increments(wei_N_tail);
}

for (auto nb = wei_N_tail; nb < N_blk; nb += wei_N_blk) {
emit_brgemm_copy_b_kernel_call(wei_N_blk, K, start_in, start_out, start_comp);
add_ptr_increments(wei_N_blk);
}

postamble();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@ namespace repacking {
size_t compute_inner_n_block(const ov::element::Type& precision);
/// \brief Computes inner K block size used by OneDNN implementation. Depends on tensor precision
size_t compute_inner_k_block(const ov::element::Type& precision);

/// \brief Computes N dim in output blocked shape of BrgemmCopyB. Depends on tensor precision
template <
typename T,
typename = typename std::enable_if<(std::is_same<T, size_t>::value || std::is_same<T, int64_t>::value), bool>::type>
inline T compute_repacked_n_dim(T n, const ov::element::Type& precision) {
return ov::snippets::utils::rnd_up(n, static_cast<T>(compute_inner_n_block(precision)));
}
/**
* @brief Computes leading dimension (LDB) which must be used in brgemm and brgemm_copy_b emitters
* @param n_block N block size shared between BrgemmCPU and BrgemmCopyB node
Expand All @@ -68,10 +76,8 @@ size_t compute_inner_k_block(const ov::element::Type& precision);
template <
typename T,
typename = typename std::enable_if<(std::is_same<T, size_t>::value || std::is_same<T, int64_t>::value), bool>::type>
T compute_LDB(T n_block, const ov::element::Type& precision) {
return snippets::utils::is_dynamic_value<T>(n_block)
? n_block
: std::max(n_block, static_cast<T>(compute_inner_n_block(precision)));
inline T compute_LDB(T n_block, const ov::element::Type& precision) {
return compute_repacked_n_dim(n_block, precision);
}
/**
* @brief Retrieves the expression pointer for the brgemm_copy_b expression corresponding to the given BrgemmCPU
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ void RepackedWeightsBufferExpression::init_allocation_size(

const auto& precision = get_node()->get_input_element_type(0);
// Repacking buffer shape is set in accordance to OneDNN requirements
const size_t N_dim = std::max(n_blk, compute_inner_n_block(precision));
const size_t N_dim = compute_repacked_n_dim(n_blk, precision);
if (!in_layout.empty() && in_layout.back() != in_layout.size() - 1) {
// In case of transpose, K dimension must be rounded-up to number of elems in vector register
// For the details, please see 'transpose16x8' and 'fixup16x16' implementations and usage in
Expand Down Expand Up @@ -88,13 +88,9 @@ void CompensationsBufferExpression::init_allocation_size(
// Compensations are computed during repacking, so we need to round-up allocation shape according to m_inner_n_block
// because of OneDNN implementation nuances (as in get_repacking_buffer_size).
// However, the compensations are computed by N dimension, so K dimension doesn't affect the compensations buffer
const auto& precision = parent_expr->get_node()->get_input_element_type(0);
const size_t n_blk = *ov::snippets::utils::get_projected_subtensor(parent_expr->get_input_port(0)).rbegin();
if (snippets::utils::is_dynamic_value(n_blk)) {
m_allocation_size = snippets::utils::get_dynamic_value<size_t>();
} else {
const auto& precision = parent_expr->get_node()->get_input_element_type(0);
m_allocation_size = std::max(n_blk, compute_inner_n_block(precision));
}
m_allocation_size = compute_repacked_n_dim(n_blk, precision);
}

} // namespace intel_cpu
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ VectorDims BrgemmExternalRepackingAdjuster::get_blk_shape(const VectorDims& plan
const auto K = *++planar_shape.rbegin();
const auto N = *planar_shape.rbegin();
const auto new_K = snippets::utils::div_up(K, vnni_factor);
const auto new_N = std::max(N, brgemm_utils::repacking::compute_inner_n_block(prc));
const auto new_N = brgemm_utils::repacking::compute_repacked_n_dim(N, prc);
VectorDims blk_shape(planar_shape.begin(), planar_shape.end() - brgemm_kernel_rank);
blk_shape.insert(blk_shape.end(), {new_K, new_N, vnni_factor});
return blk_shape;
Expand Down
Loading