From fcd81b4e4b038497d14a805114beb00cbd5f43e8 Mon Sep 17 00:00:00 2001 From: choikwa <5455710+choikwa@users.noreply.github.com> Date: Tue, 27 Feb 2024 10:13:59 -0500 Subject: [PATCH] [AMDGPU] Prevent hang in SIFoldOperands by caching uses (#82099) foldOperands() for REG_SEQUENCE has recursion that can trigger an infinite loop as the method can modify the operand order, which messes up the range-based for loop. This patch fixes the issue by caching the uses for processing beforehand, and then iterating over the cache rather using the instruction iterator. Change-Id: Iac081f4e363984cfd9917672e7d93107c51c97ac --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 15 +++++++++------ .../CodeGen/AMDGPU/si-fold-reg-sequence.mir | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 9f1d6038f1b6d7..3b98b8989d2e78 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -601,20 +601,23 @@ void SIFoldOperands::foldOperand( Register RegSeqDstReg = UseMI->getOperand(0).getReg(); unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); - for (auto &RSUse : make_early_inc_range(MRI->use_nodbg_operands(RegSeqDstReg))) { - MachineInstr *RSUseMI = RSUse.getParent(); + // Grab the use operands first + SmallVector UsesToProcess; + for (auto &Use : MRI->use_nodbg_operands(RegSeqDstReg)) + UsesToProcess.push_back(&Use); + for (auto *RSUse : UsesToProcess) { + MachineInstr *RSUseMI = RSUse->getParent(); if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI, - RSUseMI->getOperandNo(&RSUse), FoldList)) + RSUseMI->getOperandNo(RSUse), FoldList)) continue; - if (RSUse.getSubReg() != RegSeqDstSubReg) + if (RSUse->getSubReg() != RegSeqDstSubReg) continue; - foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(&RSUse), FoldList, + foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(RSUse), FoldList, CopiesToReplace); } - return; } diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir b/llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir new file mode 100644 index 00000000000000..7852f5d0c96f55 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir @@ -0,0 +1,18 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-fold-operands -verify-machineinstrs -o - %s + +--- +name: fold_reg_sequence +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:sreg_32 = S_MOV_B32 0 + %1:sreg_32 = S_MOV_B32 429 + %2:sreg_64 = REG_SEQUENCE killed %1, %subreg.sub0, %0, %subreg.sub1 + %3:vgpr_32 = V_MUL_HI_U32_e64 $vgpr2, %2.sub0, implicit $exec + %4:vgpr_32 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32), addrspace 1) + %5:vgpr_32 = V_MUL_HI_U32_e64 %4, %2.sub0, implicit $exec + S_ENDPGM 0 + +... +