forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AArch64] Lower partial add reduction to udot or svdot (llvm#101010)
This patch introduces lowering of the partial add reduction intrinsic to a udot or svdot for AArch64. This also involves adding a `shouldExpandPartialReductionIntrinsic` target hook, which AArch64 will return false from in the cases that it can be lowered.
- Loading branch information
1 parent
df3d70b
commit 44cfbef
Showing
7 changed files
with
217 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc -mtriple=aarch64 -mattr=+sve2 %s -o - | FileCheck %s | ||
|
||
define <vscale x 4 x i32> @dotp(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { | ||
; CHECK-LABEL: dotp: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: udot z0.s, z1.b, z2.b | ||
; CHECK-NEXT: ret | ||
entry: | ||
%a.wide = zext <vscale x 16 x i8> %a to <vscale x 16 x i32> | ||
%b.wide = zext <vscale x 16 x i8> %b to <vscale x 16 x i32> | ||
%mult = mul nuw nsw <vscale x 16 x i32> %a.wide, %b.wide | ||
%partial.reduce = tail call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %acc, <vscale x 16 x i32> %mult) | ||
ret <vscale x 4 x i32> %partial.reduce | ||
} | ||
|
||
define <vscale x 2 x i64> @dotp_wide(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { | ||
; CHECK-LABEL: dotp_wide: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: udot z0.d, z1.h, z2.h | ||
; CHECK-NEXT: ret | ||
entry: | ||
%a.wide = zext <vscale x 8 x i16> %a to <vscale x 8 x i64> | ||
%b.wide = zext <vscale x 8 x i16> %b to <vscale x 8 x i64> | ||
%mult = mul nuw nsw <vscale x 8 x i64> %a.wide, %b.wide | ||
%partial.reduce = tail call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64(<vscale x 2 x i64> %acc, <vscale x 8 x i64> %mult) | ||
ret <vscale x 2 x i64> %partial.reduce | ||
} | ||
|
||
define <vscale x 4 x i32> @dotp_sext(<vscale x 4 x i32> %accc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { | ||
; CHECK-LABEL: dotp_sext: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: sdot z0.s, z1.b, z2.b | ||
; CHECK-NEXT: ret | ||
entry: | ||
%a.wide = sext <vscale x 16 x i8> %a to <vscale x 16 x i32> | ||
%b.wide = sext <vscale x 16 x i8> %b to <vscale x 16 x i32> | ||
%mult = mul nuw nsw <vscale x 16 x i32> %a.wide, %b.wide | ||
%partial.reduce = tail call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %accc, <vscale x 16 x i32> %mult) | ||
ret <vscale x 4 x i32> %partial.reduce | ||
} | ||
|
||
define <vscale x 2 x i64> @dotp_wide_sext(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { | ||
; CHECK-LABEL: dotp_wide_sext: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: sdot z0.d, z1.h, z2.h | ||
; CHECK-NEXT: ret | ||
entry: | ||
%a.wide = sext <vscale x 8 x i16> %a to <vscale x 8 x i64> | ||
%b.wide = sext <vscale x 8 x i16> %b to <vscale x 8 x i64> | ||
%mult = mul nuw nsw <vscale x 8 x i64> %a.wide, %b.wide | ||
%partial.reduce = tail call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64(<vscale x 2 x i64> %acc, <vscale x 8 x i64> %mult) | ||
ret <vscale x 2 x i64> %partial.reduce | ||
} | ||
|
||
define <vscale x 4 x i32> @not_dotp(<vscale x 4 x i32> %acc, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b) { | ||
; CHECK-LABEL: not_dotp: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: and z1.h, z1.h, #0xff | ||
; CHECK-NEXT: and z2.h, z2.h, #0xff | ||
; CHECK-NEXT: ptrue p0.s | ||
; CHECK-NEXT: uunpklo z3.s, z1.h | ||
; CHECK-NEXT: uunpklo z4.s, z2.h | ||
; CHECK-NEXT: uunpkhi z1.s, z1.h | ||
; CHECK-NEXT: uunpkhi z2.s, z2.h | ||
; CHECK-NEXT: mla z0.s, p0/m, z3.s, z4.s | ||
; CHECK-NEXT: mla z0.s, p0/m, z1.s, z2.s | ||
; CHECK-NEXT: ret | ||
entry: | ||
%a.wide = zext <vscale x 8 x i8> %a to <vscale x 8 x i32> | ||
%b.wide = zext <vscale x 8 x i8> %b to <vscale x 8 x i32> | ||
%mult = mul nuw nsw <vscale x 8 x i32> %a.wide, %b.wide | ||
%partial.reduce = tail call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %acc, <vscale x 8 x i32> %mult) | ||
ret <vscale x 4 x i32> %partial.reduce | ||
} | ||
|
||
define <vscale x 2 x i64> @not_dotp_wide(<vscale x 2 x i64> %acc, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b) { | ||
; CHECK-LABEL: not_dotp_wide: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: and z1.s, z1.s, #0xffff | ||
; CHECK-NEXT: and z2.s, z2.s, #0xffff | ||
; CHECK-NEXT: ptrue p0.d | ||
; CHECK-NEXT: uunpklo z3.d, z1.s | ||
; CHECK-NEXT: uunpklo z4.d, z2.s | ||
; CHECK-NEXT: uunpkhi z1.d, z1.s | ||
; CHECK-NEXT: uunpkhi z2.d, z2.s | ||
; CHECK-NEXT: mla z0.d, p0/m, z3.d, z4.d | ||
; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d | ||
; CHECK-NEXT: ret | ||
entry: | ||
%a.wide = zext <vscale x 4 x i16> %a to <vscale x 4 x i64> | ||
%b.wide = zext <vscale x 4 x i16> %b to <vscale x 4 x i64> | ||
%mult = mul nuw nsw <vscale x 4 x i64> %a.wide, %b.wide | ||
%partial.reduce = tail call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv4i64(<vscale x 2 x i64> %acc, <vscale x 4 x i64> %mult) | ||
ret <vscale x 2 x i64> %partial.reduce | ||
} |