Skip to content

Commit

Permalink
Fix lowering logic for low-precision types in XeTileToXeGPU (#817)
Browse files Browse the repository at this point in the history
  • Loading branch information
charithaintc authored Jul 30, 2024
1 parent a4d993d commit 4e86e50
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion lib/Conversion/XeTileToXeGPU/XeTileOpConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,11 @@ struct SgLoadTileOpPattern : public XeOneToNConversion<xetile::LoadTileOp> {
// TODO: move these two into architecture abstracture in future.
const int SIMD_WIDTH_IN_BITS = 32;
int factor = SIMD_WIDTH_IN_BITS / elemTy.getIntOrFloatBitWidth();
// TODO: use uArch for this?
auto isLowPrecision = [](unsigned int width) -> bool {
bool isPowerOf2 = (width & (width - 1)) == 0;
return isPowerOf2 & (width < 32) & (width > 1);
};
if (isForDPASB(op) && factor > 1)
vnniAttr = mlir::UnitAttr::get(ctx);

Expand All @@ -621,7 +626,7 @@ struct SgLoadTileOpPattern : public XeOneToNConversion<xetile::LoadTileOp> {
auto elemWidth = elemTy.getIntOrFloatBitWidth();
if (elemWidth == 32) {
transposeAttr = rewriter.getDenseI64ArrayAttr({1, 0});
} else if (elemWidth == 16 && vnniAttr) {
} else if (isLowPrecision(elemWidth) && vnniAttr) {
transposeAttr = rewriter.getDenseI64ArrayAttr({1, 0});
transposeBitWidthAttr = rewriter.getI32IntegerAttr(32);
vnniAttr = nullptr;
Expand Down

0 comments on commit 4e86e50

Please sign in to comment.