Skip to content

Commit

Permalink
[AMDGPU] ds_read_*/ds_write_* operations require strict alignment.
Browse files Browse the repository at this point in the history
Due to performance reasons, ds_read_*/ds_write_* operations require
strict alignment. Avoid selecting them in under-aligned situations
irrespective of whether "unligned access mode" is enabled or not.

Change-Id: Ibe648cf663eb80365cff0e456e69a813c7e55aa2
  • Loading branch information
mshivama committed Mar 29, 2021
1 parent 0c641f1 commit 8bdba68
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 67 deletions.
52 changes: 10 additions & 42 deletions llvm/lib/Target/AMDGPU/DSInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -674,38 +674,6 @@ defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;

let AddedComplexity = 100 in {

foreach vt = VReg_64.RegTypes in {
defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">;
}

let SubtargetPredicate = isGFX7Plus in {

foreach vt = VReg_96.RegTypes in {
defm : DSReadPat_mc <DS_READ_B96, vt, "load_align16_local">;
}

foreach vt = VReg_128.RegTypes in {
defm : DSReadPat_mc <DS_READ_B128, vt, "load_align16_local">;
}

let SubtargetPredicate = HasUnalignedAccessMode in {

foreach vt = VReg_96.RegTypes in {
defm : DSReadPat_mc <DS_READ_B96, vt, "load_local">;
}

foreach vt = VReg_128.RegTypes in {
defm : DSReadPat_mc <DS_READ_B128, vt, "load_local">;
}

} // End SubtargetPredicate = HasUnalignedAccessMode

} // End SubtargetPredicate = isGFX7Plus

} // End AddedComplexity = 100

let OtherPredicates = [D16PreservesUnusedBits] in {
def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2i16>;
def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2f16>;
Expand Down Expand Up @@ -831,31 +799,31 @@ defm : DS128Bit8ByteAlignedPat_mc<vt>;

let AddedComplexity = 100 in {

foreach vt = VReg_64.RegTypes in {
defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">;
}

foreach vt = VReg_64.RegTypes in {
defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align8_local">;
}

let SubtargetPredicate = isGFX7Plus in {

foreach vt = VReg_96.RegTypes in {
defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_align16_local">;
}

foreach vt = VReg_128.RegTypes in {
defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align16_local">;
defm : DSReadPat_mc <DS_READ_B96, vt, "load_align16_local">;
}

let SubtargetPredicate = HasUnalignedAccessMode in {

foreach vt = VReg_96.RegTypes in {
defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_local">;
defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_align16_local">;
}

foreach vt = VReg_128.RegTypes in {
defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_local">;
defm : DSReadPat_mc <DS_READ_B128, vt, "load_align16_local">;
}

} // End SubtargetPredicate = HasUnalignedAccessMode
foreach vt = VReg_128.RegTypes in {
defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align16_local">;
}

} // End SubtargetPredicate = isGFX7Plus

Expand Down
40 changes: 15 additions & 25 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1431,42 +1431,32 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(

if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
// Check if alignment requirements for ds_read/write instructions are
// disabled.
if (Subtarget->hasUnalignedDSAccessEnabled() &&
!Subtarget->hasLDSMisalignedBug()) {
if (IsFast)
*IsFast = Alignment != Align(2);
return true;
}

if (Size == 64) {
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
// aligned, 8 byte access in a single operation using ds_read2/write2_b32
// with adjacent offsets.
bool AlignedBy4 = Alignment >= Align(4);
// ds_read/write_b64 always require 8-byte alignment for performance
// reasons.
bool AlignedBy8 = Alignment >= Align(8);
if (IsFast)
*IsFast = AlignedBy4;
*IsFast = AlignedBy8;

return AlignedBy4;
return AlignedBy8;
}
if (Size == 96) {
// ds_read/write_b96 require 16-byte alignment on gfx8 and older.
bool Aligned = Alignment >= Align(16);
// ds_read/write_b96 always require 16-byte alignment for performance
// reasons.
bool AlignedBy16 = Alignment >= Align(16);
if (IsFast)
*IsFast = Aligned;
*IsFast = AlignedBy16;

return Aligned;
return AlignedBy16;
}
if (Size == 128) {
// ds_read/write_b128 require 16-byte alignment on gfx8 and older, but we
// can do a 8 byte aligned, 16 byte access in a single operation using
// ds_read2/write2_b64.
bool Aligned = Alignment >= Align(8);
// ds_read/write_b128 always require 16-byte alignment for performance
// reasons.
bool AlignedBy16 = Alignment >= Align(16);
if (IsFast)
*IsFast = Aligned;
*IsFast = AlignedBy16;

return Aligned;
return AlignedBy16;
}
}

Expand Down

0 comments on commit 8bdba68

Please sign in to comment.