Skip to content

Commit

Permalink
[AMDGPU] Prevent hang in SIFoldOperands by caching uses (llvm#82099)
Browse files Browse the repository at this point in the history
foldOperands() for REG_SEQUENCE has recursion that can trigger an infinite loop
as the method can modify the operand order, which messes up the range-based
for loop. This patch fixes the issue by caching the uses for processing beforehand,
and then iterating over the cache rather using the instruction iterator.

Change-Id: Iac081f4e363984cfd9917672e7d93107c51c97ac
  • Loading branch information
choikwa authored and zhang2amd committed Mar 7, 2024
1 parent 885eaf5 commit fcd81b4
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 6 deletions.
15 changes: 9 additions & 6 deletions llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -601,20 +601,23 @@ void SIFoldOperands::foldOperand(
Register RegSeqDstReg = UseMI->getOperand(0).getReg();
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();

for (auto &RSUse : make_early_inc_range(MRI->use_nodbg_operands(RegSeqDstReg))) {
MachineInstr *RSUseMI = RSUse.getParent();
// Grab the use operands first
SmallVector<MachineOperand *, 4> UsesToProcess;
for (auto &Use : MRI->use_nodbg_operands(RegSeqDstReg))
UsesToProcess.push_back(&Use);
for (auto *RSUse : UsesToProcess) {
MachineInstr *RSUseMI = RSUse->getParent();

if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI,
RSUseMI->getOperandNo(&RSUse), FoldList))
RSUseMI->getOperandNo(RSUse), FoldList))
continue;

if (RSUse.getSubReg() != RegSeqDstSubReg)
if (RSUse->getSubReg() != RegSeqDstSubReg)
continue;

foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(&RSUse), FoldList,
foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(RSUse), FoldList,
CopiesToReplace);
}

return;
}

Expand Down
18 changes: 18 additions & 0 deletions llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-fold-operands -verify-machineinstrs -o - %s

---
name: fold_reg_sequence
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2
%0:sreg_32 = S_MOV_B32 0
%1:sreg_32 = S_MOV_B32 429
%2:sreg_64 = REG_SEQUENCE killed %1, %subreg.sub0, %0, %subreg.sub1
%3:vgpr_32 = V_MUL_HI_U32_e64 $vgpr2, %2.sub0, implicit $exec
%4:vgpr_32 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32), addrspace 1)
%5:vgpr_32 = V_MUL_HI_U32_e64 %4, %2.sub0, implicit $exec
S_ENDPGM 0
...

0 comments on commit fcd81b4

Please sign in to comment.