diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 0ce762ee3c50cc..5e81ad381d3d0d 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -611,3 +611,37 @@ bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR, } return false; } + +bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const { + if (UsesAGPRs) + return *UsesAGPRs; + + if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) || + MF.getFrameInfo().hasCalls()) { + UsesAGPRs = true; + return true; + } + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + const Register Reg = Register::index2VirtReg(I); + const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg); + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + if (RC && TRI->isAGPRClass(RC)) { + UsesAGPRs = true; + return true; + } + } + + for (MCRegister Reg : AMDGPU::AGPR_32RegClass) { + if (MRI.isPhysRegUsed(Reg)) { + UsesAGPRs = true; + return true; + } + } + + UsesAGPRs = false; + return false; +} diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 298f52c88abc74..baa4cb00aefd93 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -429,6 +429,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // Current recorded maximum possible occupancy. unsigned Occupancy; + mutable Optional UsesAGPRs; + MCPhysReg getNextUserSGPR() const; MCPhysReg getNextSystemSGPR() const; @@ -937,6 +939,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { Occupancy = Limit; limitOccupancy(MF); } + + // \returns true if a function needs or may need AGPRs. + bool usesAGPRs(const MachineFunction &MF) const; }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index cf4035b6b268e2..d2c85861a34ca0 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -510,18 +510,36 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, Reg); } + const SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF); - // TODO: In an entry function without calls and AGPRs used it is possible - // to use the whole register budget for VGPRs. Even more it shall - // be possible to estimate maximum AGPR/VGPR pressure and split - // register file accordingly. - if (ST.hasGFX90AInsts()) - MaxNumVGPRs /= 2; + unsigned MaxNumAGPRs = MaxNumVGPRs; unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs(); + + if (ST.hasGFX90AInsts()) { + // In an entry function without calls and AGPRs used it is possible to use + // the whole register budget for VGPRs. + + // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and + // split register file accordingly. + if (MFI->usesAGPRs(MF)) { + MaxNumVGPRs /= 2; + MaxNumAGPRs = MaxNumVGPRs; + } else { + if (MaxNumVGPRs > TotalNumVGPRs) { + MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs; + MaxNumVGPRs = TotalNumVGPRs; + } else + MaxNumAGPRs = 0; + } + } + for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) { unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i); reserveRegisterTuples(Reserved, Reg); - Reg = AMDGPU::AGPR_32RegClass.getRegister(i); + } + + for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) { + unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i); reserveRegisterTuples(Reserved, Reg); } @@ -545,8 +563,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { } } - const SIMachineFunctionInfo *MFI = MF.getInfo(); - Register ScratchRSrcReg = MFI->getScratchRSrcReg(); if (ScratchRSrcReg != AMDGPU::NoRegister) { // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll index da32323d674452..c105d4df70dd69 100644 --- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll @@ -548,6 +548,7 @@ attributes #256 = { nounwind "amdgpu-flat-work-group-size"="256,256" } ; GFX10CU-WAVE32: NumVgprs: 128 ; GFX10CU-WAVE64: NumVgprs: 128 define amdgpu_kernel void @f512() #512 { + call void @foo() call void @use256vgprs() ret void } @@ -563,7 +564,11 @@ attributes #512 = { nounwind "amdgpu-flat-work-group-size"="512,512" } ; GFX10CU-WAVE32: NumVgprs: 64 ; GFX10CU-WAVE64: NumVgprs: 64 define amdgpu_kernel void @f1024() #1024 { + call void @foo() call void @use256vgprs() ret void } + attributes #1024 = { nounwind "amdgpu-flat-work-group-size"="1024,1024" } + +declare void @foo() diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll new file mode 100644 index 00000000000000..19e2068510a0b6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll @@ -0,0 +1,700 @@ +; -enable-misched=false makes the register usage more predictable +; -regalloc=fast just makes the test run faster +; RUN: llc -march=amdgcn -mcpu=gfx90a -amdgpu-function-calls=false -enable-misched=false -regalloc=fast < %s | FileCheck %s --check-prefixes=GCN,GFX90A + +define internal void @use256vgprs() { + %v0 = call i32 asm sideeffect "; def $0", "=v"() + %v1 = call i32 asm sideeffect "; def $0", "=v"() + %v2 = call i32 asm sideeffect "; def $0", "=v"() + %v3 = call i32 asm sideeffect "; def $0", "=v"() + %v4 = call i32 asm sideeffect "; def $0", "=v"() + %v5 = call i32 asm sideeffect "; def $0", "=v"() + %v6 = call i32 asm sideeffect "; def $0", "=v"() + %v7 = call i32 asm sideeffect "; def $0", "=v"() + %v8 = call i32 asm sideeffect "; def $0", "=v"() + %v9 = call i32 asm sideeffect "; def $0", "=v"() + %v10 = call i32 asm sideeffect "; def $0", "=v"() + %v11 = call i32 asm sideeffect "; def $0", "=v"() + %v12 = call i32 asm sideeffect "; def $0", "=v"() + %v13 = call i32 asm sideeffect "; def $0", "=v"() + %v14 = call i32 asm sideeffect "; def $0", "=v"() + %v15 = call i32 asm sideeffect "; def $0", "=v"() + %v16 = call i32 asm sideeffect "; def $0", "=v"() + %v17 = call i32 asm sideeffect "; def $0", "=v"() + %v18 = call i32 asm sideeffect "; def $0", "=v"() + %v19 = call i32 asm sideeffect "; def $0", "=v"() + %v20 = call i32 asm sideeffect "; def $0", "=v"() + %v21 = call i32 asm sideeffect "; def $0", "=v"() + %v22 = call i32 asm sideeffect "; def $0", "=v"() + %v23 = call i32 asm sideeffect "; def $0", "=v"() + %v24 = call i32 asm sideeffect "; def $0", "=v"() + %v25 = call i32 asm sideeffect "; def $0", "=v"() + %v26 = call i32 asm sideeffect "; def $0", "=v"() + %v27 = call i32 asm sideeffect "; def $0", "=v"() + %v28 = call i32 asm sideeffect "; def $0", "=v"() + %v29 = call i32 asm sideeffect "; def $0", "=v"() + %v30 = call i32 asm sideeffect "; def $0", "=v"() + %v31 = call i32 asm sideeffect "; def $0", "=v"() + %v32 = call i32 asm sideeffect "; def $0", "=v"() + %v33 = call i32 asm sideeffect "; def $0", "=v"() + %v34 = call i32 asm sideeffect "; def $0", "=v"() + %v35 = call i32 asm sideeffect "; def $0", "=v"() + %v36 = call i32 asm sideeffect "; def $0", "=v"() + %v37 = call i32 asm sideeffect "; def $0", "=v"() + %v38 = call i32 asm sideeffect "; def $0", "=v"() + %v39 = call i32 asm sideeffect "; def $0", "=v"() + %v40 = call i32 asm sideeffect "; def $0", "=v"() + %v41 = call i32 asm sideeffect "; def $0", "=v"() + %v42 = call i32 asm sideeffect "; def $0", "=v"() + %v43 = call i32 asm sideeffect "; def $0", "=v"() + %v44 = call i32 asm sideeffect "; def $0", "=v"() + %v45 = call i32 asm sideeffect "; def $0", "=v"() + %v46 = call i32 asm sideeffect "; def $0", "=v"() + %v47 = call i32 asm sideeffect "; def $0", "=v"() + %v48 = call i32 asm sideeffect "; def $0", "=v"() + %v49 = call i32 asm sideeffect "; def $0", "=v"() + %v50 = call i32 asm sideeffect "; def $0", "=v"() + %v51 = call i32 asm sideeffect "; def $0", "=v"() + %v52 = call i32 asm sideeffect "; def $0", "=v"() + %v53 = call i32 asm sideeffect "; def $0", "=v"() + %v54 = call i32 asm sideeffect "; def $0", "=v"() + %v55 = call i32 asm sideeffect "; def $0", "=v"() + %v56 = call i32 asm sideeffect "; def $0", "=v"() + %v57 = call i32 asm sideeffect "; def $0", "=v"() + %v58 = call i32 asm sideeffect "; def $0", "=v"() + %v59 = call i32 asm sideeffect "; def $0", "=v"() + %v60 = call i32 asm sideeffect "; def $0", "=v"() + %v61 = call i32 asm sideeffect "; def $0", "=v"() + %v62 = call i32 asm sideeffect "; def $0", "=v"() + %v63 = call i32 asm sideeffect "; def $0", "=v"() + %v64 = call i32 asm sideeffect "; def $0", "=v"() + %v65 = call i32 asm sideeffect "; def $0", "=v"() + %v66 = call i32 asm sideeffect "; def $0", "=v"() + %v67 = call i32 asm sideeffect "; def $0", "=v"() + %v68 = call i32 asm sideeffect "; def $0", "=v"() + %v69 = call i32 asm sideeffect "; def $0", "=v"() + %v70 = call i32 asm sideeffect "; def $0", "=v"() + %v71 = call i32 asm sideeffect "; def $0", "=v"() + %v72 = call i32 asm sideeffect "; def $0", "=v"() + %v73 = call i32 asm sideeffect "; def $0", "=v"() + %v74 = call i32 asm sideeffect "; def $0", "=v"() + %v75 = call i32 asm sideeffect "; def $0", "=v"() + %v76 = call i32 asm sideeffect "; def $0", "=v"() + %v77 = call i32 asm sideeffect "; def $0", "=v"() + %v78 = call i32 asm sideeffect "; def $0", "=v"() + %v79 = call i32 asm sideeffect "; def $0", "=v"() + %v80 = call i32 asm sideeffect "; def $0", "=v"() + %v81 = call i32 asm sideeffect "; def $0", "=v"() + %v82 = call i32 asm sideeffect "; def $0", "=v"() + %v83 = call i32 asm sideeffect "; def $0", "=v"() + %v84 = call i32 asm sideeffect "; def $0", "=v"() + %v85 = call i32 asm sideeffect "; def $0", "=v"() + %v86 = call i32 asm sideeffect "; def $0", "=v"() + %v87 = call i32 asm sideeffect "; def $0", "=v"() + %v88 = call i32 asm sideeffect "; def $0", "=v"() + %v89 = call i32 asm sideeffect "; def $0", "=v"() + %v90 = call i32 asm sideeffect "; def $0", "=v"() + %v91 = call i32 asm sideeffect "; def $0", "=v"() + %v92 = call i32 asm sideeffect "; def $0", "=v"() + %v93 = call i32 asm sideeffect "; def $0", "=v"() + %v94 = call i32 asm sideeffect "; def $0", "=v"() + %v95 = call i32 asm sideeffect "; def $0", "=v"() + %v96 = call i32 asm sideeffect "; def $0", "=v"() + %v97 = call i32 asm sideeffect "; def $0", "=v"() + %v98 = call i32 asm sideeffect "; def $0", "=v"() + %v99 = call i32 asm sideeffect "; def $0", "=v"() + %v100 = call i32 asm sideeffect "; def $0", "=v"() + %v101 = call i32 asm sideeffect "; def $0", "=v"() + %v102 = call i32 asm sideeffect "; def $0", "=v"() + %v103 = call i32 asm sideeffect "; def $0", "=v"() + %v104 = call i32 asm sideeffect "; def $0", "=v"() + %v105 = call i32 asm sideeffect "; def $0", "=v"() + %v106 = call i32 asm sideeffect "; def $0", "=v"() + %v107 = call i32 asm sideeffect "; def $0", "=v"() + %v108 = call i32 asm sideeffect "; def $0", "=v"() + %v109 = call i32 asm sideeffect "; def $0", "=v"() + %v110 = call i32 asm sideeffect "; def $0", "=v"() + %v111 = call i32 asm sideeffect "; def $0", "=v"() + %v112 = call i32 asm sideeffect "; def $0", "=v"() + %v113 = call i32 asm sideeffect "; def $0", "=v"() + %v114 = call i32 asm sideeffect "; def $0", "=v"() + %v115 = call i32 asm sideeffect "; def $0", "=v"() + %v116 = call i32 asm sideeffect "; def $0", "=v"() + %v117 = call i32 asm sideeffect "; def $0", "=v"() + %v118 = call i32 asm sideeffect "; def $0", "=v"() + %v119 = call i32 asm sideeffect "; def $0", "=v"() + %v120 = call i32 asm sideeffect "; def $0", "=v"() + %v121 = call i32 asm sideeffect "; def $0", "=v"() + %v122 = call i32 asm sideeffect "; def $0", "=v"() + %v123 = call i32 asm sideeffect "; def $0", "=v"() + %v124 = call i32 asm sideeffect "; def $0", "=v"() + %v125 = call i32 asm sideeffect "; def $0", "=v"() + %v126 = call i32 asm sideeffect "; def $0", "=v"() + %v127 = call i32 asm sideeffect "; def $0", "=v"() + %v128 = call i32 asm sideeffect "; def $0", "=v"() + %v129 = call i32 asm sideeffect "; def $0", "=v"() + %v130 = call i32 asm sideeffect "; def $0", "=v"() + %v131 = call i32 asm sideeffect "; def $0", "=v"() + %v132 = call i32 asm sideeffect "; def $0", "=v"() + %v133 = call i32 asm sideeffect "; def $0", "=v"() + %v134 = call i32 asm sideeffect "; def $0", "=v"() + %v135 = call i32 asm sideeffect "; def $0", "=v"() + %v136 = call i32 asm sideeffect "; def $0", "=v"() + %v137 = call i32 asm sideeffect "; def $0", "=v"() + %v138 = call i32 asm sideeffect "; def $0", "=v"() + %v139 = call i32 asm sideeffect "; def $0", "=v"() + %v140 = call i32 asm sideeffect "; def $0", "=v"() + %v141 = call i32 asm sideeffect "; def $0", "=v"() + %v142 = call i32 asm sideeffect "; def $0", "=v"() + %v143 = call i32 asm sideeffect "; def $0", "=v"() + %v144 = call i32 asm sideeffect "; def $0", "=v"() + %v145 = call i32 asm sideeffect "; def $0", "=v"() + %v146 = call i32 asm sideeffect "; def $0", "=v"() + %v147 = call i32 asm sideeffect "; def $0", "=v"() + %v148 = call i32 asm sideeffect "; def $0", "=v"() + %v149 = call i32 asm sideeffect "; def $0", "=v"() + %v150 = call i32 asm sideeffect "; def $0", "=v"() + %v151 = call i32 asm sideeffect "; def $0", "=v"() + %v152 = call i32 asm sideeffect "; def $0", "=v"() + %v153 = call i32 asm sideeffect "; def $0", "=v"() + %v154 = call i32 asm sideeffect "; def $0", "=v"() + %v155 = call i32 asm sideeffect "; def $0", "=v"() + %v156 = call i32 asm sideeffect "; def $0", "=v"() + %v157 = call i32 asm sideeffect "; def $0", "=v"() + %v158 = call i32 asm sideeffect "; def $0", "=v"() + %v159 = call i32 asm sideeffect "; def $0", "=v"() + %v160 = call i32 asm sideeffect "; def $0", "=v"() + %v161 = call i32 asm sideeffect "; def $0", "=v"() + %v162 = call i32 asm sideeffect "; def $0", "=v"() + %v163 = call i32 asm sideeffect "; def $0", "=v"() + %v164 = call i32 asm sideeffect "; def $0", "=v"() + %v165 = call i32 asm sideeffect "; def $0", "=v"() + %v166 = call i32 asm sideeffect "; def $0", "=v"() + %v167 = call i32 asm sideeffect "; def $0", "=v"() + %v168 = call i32 asm sideeffect "; def $0", "=v"() + %v169 = call i32 asm sideeffect "; def $0", "=v"() + %v170 = call i32 asm sideeffect "; def $0", "=v"() + %v171 = call i32 asm sideeffect "; def $0", "=v"() + %v172 = call i32 asm sideeffect "; def $0", "=v"() + %v173 = call i32 asm sideeffect "; def $0", "=v"() + %v174 = call i32 asm sideeffect "; def $0", "=v"() + %v175 = call i32 asm sideeffect "; def $0", "=v"() + %v176 = call i32 asm sideeffect "; def $0", "=v"() + %v177 = call i32 asm sideeffect "; def $0", "=v"() + %v178 = call i32 asm sideeffect "; def $0", "=v"() + %v179 = call i32 asm sideeffect "; def $0", "=v"() + %v180 = call i32 asm sideeffect "; def $0", "=v"() + %v181 = call i32 asm sideeffect "; def $0", "=v"() + %v182 = call i32 asm sideeffect "; def $0", "=v"() + %v183 = call i32 asm sideeffect "; def $0", "=v"() + %v184 = call i32 asm sideeffect "; def $0", "=v"() + %v185 = call i32 asm sideeffect "; def $0", "=v"() + %v186 = call i32 asm sideeffect "; def $0", "=v"() + %v187 = call i32 asm sideeffect "; def $0", "=v"() + %v188 = call i32 asm sideeffect "; def $0", "=v"() + %v189 = call i32 asm sideeffect "; def $0", "=v"() + %v190 = call i32 asm sideeffect "; def $0", "=v"() + %v191 = call i32 asm sideeffect "; def $0", "=v"() + %v192 = call i32 asm sideeffect "; def $0", "=v"() + %v193 = call i32 asm sideeffect "; def $0", "=v"() + %v194 = call i32 asm sideeffect "; def $0", "=v"() + %v195 = call i32 asm sideeffect "; def $0", "=v"() + %v196 = call i32 asm sideeffect "; def $0", "=v"() + %v197 = call i32 asm sideeffect "; def $0", "=v"() + %v198 = call i32 asm sideeffect "; def $0", "=v"() + %v199 = call i32 asm sideeffect "; def $0", "=v"() + %v200 = call i32 asm sideeffect "; def $0", "=v"() + %v201 = call i32 asm sideeffect "; def $0", "=v"() + %v202 = call i32 asm sideeffect "; def $0", "=v"() + %v203 = call i32 asm sideeffect "; def $0", "=v"() + %v204 = call i32 asm sideeffect "; def $0", "=v"() + %v205 = call i32 asm sideeffect "; def $0", "=v"() + %v206 = call i32 asm sideeffect "; def $0", "=v"() + %v207 = call i32 asm sideeffect "; def $0", "=v"() + %v208 = call i32 asm sideeffect "; def $0", "=v"() + %v209 = call i32 asm sideeffect "; def $0", "=v"() + %v210 = call i32 asm sideeffect "; def $0", "=v"() + %v211 = call i32 asm sideeffect "; def $0", "=v"() + %v212 = call i32 asm sideeffect "; def $0", "=v"() + %v213 = call i32 asm sideeffect "; def $0", "=v"() + %v214 = call i32 asm sideeffect "; def $0", "=v"() + %v215 = call i32 asm sideeffect "; def $0", "=v"() + %v216 = call i32 asm sideeffect "; def $0", "=v"() + %v217 = call i32 asm sideeffect "; def $0", "=v"() + %v218 = call i32 asm sideeffect "; def $0", "=v"() + %v219 = call i32 asm sideeffect "; def $0", "=v"() + %v220 = call i32 asm sideeffect "; def $0", "=v"() + %v221 = call i32 asm sideeffect "; def $0", "=v"() + %v222 = call i32 asm sideeffect "; def $0", "=v"() + %v223 = call i32 asm sideeffect "; def $0", "=v"() + %v224 = call i32 asm sideeffect "; def $0", "=v"() + %v225 = call i32 asm sideeffect "; def $0", "=v"() + %v226 = call i32 asm sideeffect "; def $0", "=v"() + %v227 = call i32 asm sideeffect "; def $0", "=v"() + %v228 = call i32 asm sideeffect "; def $0", "=v"() + %v229 = call i32 asm sideeffect "; def $0", "=v"() + %v230 = call i32 asm sideeffect "; def $0", "=v"() + %v231 = call i32 asm sideeffect "; def $0", "=v"() + %v232 = call i32 asm sideeffect "; def $0", "=v"() + %v233 = call i32 asm sideeffect "; def $0", "=v"() + %v234 = call i32 asm sideeffect "; def $0", "=v"() + %v235 = call i32 asm sideeffect "; def $0", "=v"() + %v236 = call i32 asm sideeffect "; def $0", "=v"() + %v237 = call i32 asm sideeffect "; def $0", "=v"() + %v238 = call i32 asm sideeffect "; def $0", "=v"() + %v239 = call i32 asm sideeffect "; def $0", "=v"() + %v240 = call i32 asm sideeffect "; def $0", "=v"() + %v241 = call i32 asm sideeffect "; def $0", "=v"() + %v242 = call i32 asm sideeffect "; def $0", "=v"() + %v243 = call i32 asm sideeffect "; def $0", "=v"() + %v244 = call i32 asm sideeffect "; def $0", "=v"() + %v245 = call i32 asm sideeffect "; def $0", "=v"() + %v246 = call i32 asm sideeffect "; def $0", "=v"() + %v247 = call i32 asm sideeffect "; def $0", "=v"() + %v248 = call i32 asm sideeffect "; def $0", "=v"() + %v249 = call i32 asm sideeffect "; def $0", "=v"() + %v250 = call i32 asm sideeffect "; def $0", "=v"() + %v251 = call i32 asm sideeffect "; def $0", "=v"() + %v252 = call i32 asm sideeffect "; def $0", "=v"() + %v253 = call i32 asm sideeffect "; def $0", "=v"() + %v254 = call i32 asm sideeffect "; def $0", "=v"() + %v255 = call i32 asm sideeffect "; def $0", "=v"() + call void asm sideeffect "; use $0", "v"(i32 %v0) + call void asm sideeffect "; use $0", "v"(i32 %v1) + call void asm sideeffect "; use $0", "v"(i32 %v2) + call void asm sideeffect "; use $0", "v"(i32 %v3) + call void asm sideeffect "; use $0", "v"(i32 %v4) + call void asm sideeffect "; use $0", "v"(i32 %v5) + call void asm sideeffect "; use $0", "v"(i32 %v6) + call void asm sideeffect "; use $0", "v"(i32 %v7) + call void asm sideeffect "; use $0", "v"(i32 %v8) + call void asm sideeffect "; use $0", "v"(i32 %v9) + call void asm sideeffect "; use $0", "v"(i32 %v10) + call void asm sideeffect "; use $0", "v"(i32 %v11) + call void asm sideeffect "; use $0", "v"(i32 %v12) + call void asm sideeffect "; use $0", "v"(i32 %v13) + call void asm sideeffect "; use $0", "v"(i32 %v14) + call void asm sideeffect "; use $0", "v"(i32 %v15) + call void asm sideeffect "; use $0", "v"(i32 %v16) + call void asm sideeffect "; use $0", "v"(i32 %v17) + call void asm sideeffect "; use $0", "v"(i32 %v18) + call void asm sideeffect "; use $0", "v"(i32 %v19) + call void asm sideeffect "; use $0", "v"(i32 %v20) + call void asm sideeffect "; use $0", "v"(i32 %v21) + call void asm sideeffect "; use $0", "v"(i32 %v22) + call void asm sideeffect "; use $0", "v"(i32 %v23) + call void asm sideeffect "; use $0", "v"(i32 %v24) + call void asm sideeffect "; use $0", "v"(i32 %v25) + call void asm sideeffect "; use $0", "v"(i32 %v26) + call void asm sideeffect "; use $0", "v"(i32 %v27) + call void asm sideeffect "; use $0", "v"(i32 %v28) + call void asm sideeffect "; use $0", "v"(i32 %v29) + call void asm sideeffect "; use $0", "v"(i32 %v30) + call void asm sideeffect "; use $0", "v"(i32 %v31) + call void asm sideeffect "; use $0", "v"(i32 %v32) + call void asm sideeffect "; use $0", "v"(i32 %v33) + call void asm sideeffect "; use $0", "v"(i32 %v34) + call void asm sideeffect "; use $0", "v"(i32 %v35) + call void asm sideeffect "; use $0", "v"(i32 %v36) + call void asm sideeffect "; use $0", "v"(i32 %v37) + call void asm sideeffect "; use $0", "v"(i32 %v38) + call void asm sideeffect "; use $0", "v"(i32 %v39) + call void asm sideeffect "; use $0", "v"(i32 %v40) + call void asm sideeffect "; use $0", "v"(i32 %v41) + call void asm sideeffect "; use $0", "v"(i32 %v42) + call void asm sideeffect "; use $0", "v"(i32 %v43) + call void asm sideeffect "; use $0", "v"(i32 %v44) + call void asm sideeffect "; use $0", "v"(i32 %v45) + call void asm sideeffect "; use $0", "v"(i32 %v46) + call void asm sideeffect "; use $0", "v"(i32 %v47) + call void asm sideeffect "; use $0", "v"(i32 %v48) + call void asm sideeffect "; use $0", "v"(i32 %v49) + call void asm sideeffect "; use $0", "v"(i32 %v50) + call void asm sideeffect "; use $0", "v"(i32 %v51) + call void asm sideeffect "; use $0", "v"(i32 %v52) + call void asm sideeffect "; use $0", "v"(i32 %v53) + call void asm sideeffect "; use $0", "v"(i32 %v54) + call void asm sideeffect "; use $0", "v"(i32 %v55) + call void asm sideeffect "; use $0", "v"(i32 %v56) + call void asm sideeffect "; use $0", "v"(i32 %v57) + call void asm sideeffect "; use $0", "v"(i32 %v58) + call void asm sideeffect "; use $0", "v"(i32 %v59) + call void asm sideeffect "; use $0", "v"(i32 %v60) + call void asm sideeffect "; use $0", "v"(i32 %v61) + call void asm sideeffect "; use $0", "v"(i32 %v62) + call void asm sideeffect "; use $0", "v"(i32 %v63) + call void asm sideeffect "; use $0", "v"(i32 %v64) + call void asm sideeffect "; use $0", "v"(i32 %v65) + call void asm sideeffect "; use $0", "v"(i32 %v66) + call void asm sideeffect "; use $0", "v"(i32 %v67) + call void asm sideeffect "; use $0", "v"(i32 %v68) + call void asm sideeffect "; use $0", "v"(i32 %v69) + call void asm sideeffect "; use $0", "v"(i32 %v70) + call void asm sideeffect "; use $0", "v"(i32 %v71) + call void asm sideeffect "; use $0", "v"(i32 %v72) + call void asm sideeffect "; use $0", "v"(i32 %v73) + call void asm sideeffect "; use $0", "v"(i32 %v74) + call void asm sideeffect "; use $0", "v"(i32 %v75) + call void asm sideeffect "; use $0", "v"(i32 %v76) + call void asm sideeffect "; use $0", "v"(i32 %v77) + call void asm sideeffect "; use $0", "v"(i32 %v78) + call void asm sideeffect "; use $0", "v"(i32 %v79) + call void asm sideeffect "; use $0", "v"(i32 %v80) + call void asm sideeffect "; use $0", "v"(i32 %v81) + call void asm sideeffect "; use $0", "v"(i32 %v82) + call void asm sideeffect "; use $0", "v"(i32 %v83) + call void asm sideeffect "; use $0", "v"(i32 %v84) + call void asm sideeffect "; use $0", "v"(i32 %v85) + call void asm sideeffect "; use $0", "v"(i32 %v86) + call void asm sideeffect "; use $0", "v"(i32 %v87) + call void asm sideeffect "; use $0", "v"(i32 %v88) + call void asm sideeffect "; use $0", "v"(i32 %v89) + call void asm sideeffect "; use $0", "v"(i32 %v90) + call void asm sideeffect "; use $0", "v"(i32 %v91) + call void asm sideeffect "; use $0", "v"(i32 %v92) + call void asm sideeffect "; use $0", "v"(i32 %v93) + call void asm sideeffect "; use $0", "v"(i32 %v94) + call void asm sideeffect "; use $0", "v"(i32 %v95) + call void asm sideeffect "; use $0", "v"(i32 %v96) + call void asm sideeffect "; use $0", "v"(i32 %v97) + call void asm sideeffect "; use $0", "v"(i32 %v98) + call void asm sideeffect "; use $0", "v"(i32 %v99) + call void asm sideeffect "; use $0", "v"(i32 %v100) + call void asm sideeffect "; use $0", "v"(i32 %v101) + call void asm sideeffect "; use $0", "v"(i32 %v102) + call void asm sideeffect "; use $0", "v"(i32 %v103) + call void asm sideeffect "; use $0", "v"(i32 %v104) + call void asm sideeffect "; use $0", "v"(i32 %v105) + call void asm sideeffect "; use $0", "v"(i32 %v106) + call void asm sideeffect "; use $0", "v"(i32 %v107) + call void asm sideeffect "; use $0", "v"(i32 %v108) + call void asm sideeffect "; use $0", "v"(i32 %v109) + call void asm sideeffect "; use $0", "v"(i32 %v110) + call void asm sideeffect "; use $0", "v"(i32 %v111) + call void asm sideeffect "; use $0", "v"(i32 %v112) + call void asm sideeffect "; use $0", "v"(i32 %v113) + call void asm sideeffect "; use $0", "v"(i32 %v114) + call void asm sideeffect "; use $0", "v"(i32 %v115) + call void asm sideeffect "; use $0", "v"(i32 %v116) + call void asm sideeffect "; use $0", "v"(i32 %v117) + call void asm sideeffect "; use $0", "v"(i32 %v118) + call void asm sideeffect "; use $0", "v"(i32 %v119) + call void asm sideeffect "; use $0", "v"(i32 %v120) + call void asm sideeffect "; use $0", "v"(i32 %v121) + call void asm sideeffect "; use $0", "v"(i32 %v122) + call void asm sideeffect "; use $0", "v"(i32 %v123) + call void asm sideeffect "; use $0", "v"(i32 %v124) + call void asm sideeffect "; use $0", "v"(i32 %v125) + call void asm sideeffect "; use $0", "v"(i32 %v126) + call void asm sideeffect "; use $0", "v"(i32 %v127) + call void asm sideeffect "; use $0", "v"(i32 %v128) + call void asm sideeffect "; use $0", "v"(i32 %v129) + call void asm sideeffect "; use $0", "v"(i32 %v130) + call void asm sideeffect "; use $0", "v"(i32 %v131) + call void asm sideeffect "; use $0", "v"(i32 %v132) + call void asm sideeffect "; use $0", "v"(i32 %v133) + call void asm sideeffect "; use $0", "v"(i32 %v134) + call void asm sideeffect "; use $0", "v"(i32 %v135) + call void asm sideeffect "; use $0", "v"(i32 %v136) + call void asm sideeffect "; use $0", "v"(i32 %v137) + call void asm sideeffect "; use $0", "v"(i32 %v138) + call void asm sideeffect "; use $0", "v"(i32 %v139) + call void asm sideeffect "; use $0", "v"(i32 %v140) + call void asm sideeffect "; use $0", "v"(i32 %v141) + call void asm sideeffect "; use $0", "v"(i32 %v142) + call void asm sideeffect "; use $0", "v"(i32 %v143) + call void asm sideeffect "; use $0", "v"(i32 %v144) + call void asm sideeffect "; use $0", "v"(i32 %v145) + call void asm sideeffect "; use $0", "v"(i32 %v146) + call void asm sideeffect "; use $0", "v"(i32 %v147) + call void asm sideeffect "; use $0", "v"(i32 %v148) + call void asm sideeffect "; use $0", "v"(i32 %v149) + call void asm sideeffect "; use $0", "v"(i32 %v150) + call void asm sideeffect "; use $0", "v"(i32 %v151) + call void asm sideeffect "; use $0", "v"(i32 %v152) + call void asm sideeffect "; use $0", "v"(i32 %v153) + call void asm sideeffect "; use $0", "v"(i32 %v154) + call void asm sideeffect "; use $0", "v"(i32 %v155) + call void asm sideeffect "; use $0", "v"(i32 %v156) + call void asm sideeffect "; use $0", "v"(i32 %v157) + call void asm sideeffect "; use $0", "v"(i32 %v158) + call void asm sideeffect "; use $0", "v"(i32 %v159) + call void asm sideeffect "; use $0", "v"(i32 %v160) + call void asm sideeffect "; use $0", "v"(i32 %v161) + call void asm sideeffect "; use $0", "v"(i32 %v162) + call void asm sideeffect "; use $0", "v"(i32 %v163) + call void asm sideeffect "; use $0", "v"(i32 %v164) + call void asm sideeffect "; use $0", "v"(i32 %v165) + call void asm sideeffect "; use $0", "v"(i32 %v166) + call void asm sideeffect "; use $0", "v"(i32 %v167) + call void asm sideeffect "; use $0", "v"(i32 %v168) + call void asm sideeffect "; use $0", "v"(i32 %v169) + call void asm sideeffect "; use $0", "v"(i32 %v170) + call void asm sideeffect "; use $0", "v"(i32 %v171) + call void asm sideeffect "; use $0", "v"(i32 %v172) + call void asm sideeffect "; use $0", "v"(i32 %v173) + call void asm sideeffect "; use $0", "v"(i32 %v174) + call void asm sideeffect "; use $0", "v"(i32 %v175) + call void asm sideeffect "; use $0", "v"(i32 %v176) + call void asm sideeffect "; use $0", "v"(i32 %v177) + call void asm sideeffect "; use $0", "v"(i32 %v178) + call void asm sideeffect "; use $0", "v"(i32 %v179) + call void asm sideeffect "; use $0", "v"(i32 %v180) + call void asm sideeffect "; use $0", "v"(i32 %v181) + call void asm sideeffect "; use $0", "v"(i32 %v182) + call void asm sideeffect "; use $0", "v"(i32 %v183) + call void asm sideeffect "; use $0", "v"(i32 %v184) + call void asm sideeffect "; use $0", "v"(i32 %v185) + call void asm sideeffect "; use $0", "v"(i32 %v186) + call void asm sideeffect "; use $0", "v"(i32 %v187) + call void asm sideeffect "; use $0", "v"(i32 %v188) + call void asm sideeffect "; use $0", "v"(i32 %v189) + call void asm sideeffect "; use $0", "v"(i32 %v190) + call void asm sideeffect "; use $0", "v"(i32 %v191) + call void asm sideeffect "; use $0", "v"(i32 %v192) + call void asm sideeffect "; use $0", "v"(i32 %v193) + call void asm sideeffect "; use $0", "v"(i32 %v194) + call void asm sideeffect "; use $0", "v"(i32 %v195) + call void asm sideeffect "; use $0", "v"(i32 %v196) + call void asm sideeffect "; use $0", "v"(i32 %v197) + call void asm sideeffect "; use $0", "v"(i32 %v198) + call void asm sideeffect "; use $0", "v"(i32 %v199) + call void asm sideeffect "; use $0", "v"(i32 %v200) + call void asm sideeffect "; use $0", "v"(i32 %v201) + call void asm sideeffect "; use $0", "v"(i32 %v202) + call void asm sideeffect "; use $0", "v"(i32 %v203) + call void asm sideeffect "; use $0", "v"(i32 %v204) + call void asm sideeffect "; use $0", "v"(i32 %v205) + call void asm sideeffect "; use $0", "v"(i32 %v206) + call void asm sideeffect "; use $0", "v"(i32 %v207) + call void asm sideeffect "; use $0", "v"(i32 %v208) + call void asm sideeffect "; use $0", "v"(i32 %v209) + call void asm sideeffect "; use $0", "v"(i32 %v210) + call void asm sideeffect "; use $0", "v"(i32 %v211) + call void asm sideeffect "; use $0", "v"(i32 %v212) + call void asm sideeffect "; use $0", "v"(i32 %v213) + call void asm sideeffect "; use $0", "v"(i32 %v214) + call void asm sideeffect "; use $0", "v"(i32 %v215) + call void asm sideeffect "; use $0", "v"(i32 %v216) + call void asm sideeffect "; use $0", "v"(i32 %v217) + call void asm sideeffect "; use $0", "v"(i32 %v218) + call void asm sideeffect "; use $0", "v"(i32 %v219) + call void asm sideeffect "; use $0", "v"(i32 %v220) + call void asm sideeffect "; use $0", "v"(i32 %v221) + call void asm sideeffect "; use $0", "v"(i32 %v222) + call void asm sideeffect "; use $0", "v"(i32 %v223) + call void asm sideeffect "; use $0", "v"(i32 %v224) + call void asm sideeffect "; use $0", "v"(i32 %v225) + call void asm sideeffect "; use $0", "v"(i32 %v226) + call void asm sideeffect "; use $0", "v"(i32 %v227) + call void asm sideeffect "; use $0", "v"(i32 %v228) + call void asm sideeffect "; use $0", "v"(i32 %v229) + call void asm sideeffect "; use $0", "v"(i32 %v230) + call void asm sideeffect "; use $0", "v"(i32 %v231) + call void asm sideeffect "; use $0", "v"(i32 %v232) + call void asm sideeffect "; use $0", "v"(i32 %v233) + call void asm sideeffect "; use $0", "v"(i32 %v234) + call void asm sideeffect "; use $0", "v"(i32 %v235) + call void asm sideeffect "; use $0", "v"(i32 %v236) + call void asm sideeffect "; use $0", "v"(i32 %v237) + call void asm sideeffect "; use $0", "v"(i32 %v238) + call void asm sideeffect "; use $0", "v"(i32 %v239) + call void asm sideeffect "; use $0", "v"(i32 %v240) + call void asm sideeffect "; use $0", "v"(i32 %v241) + call void asm sideeffect "; use $0", "v"(i32 %v242) + call void asm sideeffect "; use $0", "v"(i32 %v243) + call void asm sideeffect "; use $0", "v"(i32 %v244) + call void asm sideeffect "; use $0", "v"(i32 %v245) + call void asm sideeffect "; use $0", "v"(i32 %v246) + call void asm sideeffect "; use $0", "v"(i32 %v247) + call void asm sideeffect "; use $0", "v"(i32 %v248) + call void asm sideeffect "; use $0", "v"(i32 %v249) + call void asm sideeffect "; use $0", "v"(i32 %v250) + call void asm sideeffect "; use $0", "v"(i32 %v251) + call void asm sideeffect "; use $0", "v"(i32 %v252) + call void asm sideeffect "; use $0", "v"(i32 %v253) + call void asm sideeffect "; use $0", "v"(i32 %v254) + call void asm sideeffect "; use $0", "v"(i32 %v255) + ret void +} + +define internal void @use512vgprs() { + %v0 = call <32 x i32> asm sideeffect "; def $0", "=v"() + %v1 = call <32 x i32> asm sideeffect "; def $0", "=v"() + %v2 = call <32 x i32> asm sideeffect "; def $0", "=v"() + %v3 = call <32 x i32> asm sideeffect "; def $0", "=v"() + %v4 = call <32 x i32> asm sideeffect "; def $0", "=v"() + %v5 = call <32 x i32> asm sideeffect "; def $0", "=v"() + %v6 = call <32 x i32> asm sideeffect "; def $0", "=v"() + %v7 = call <32 x i32> asm sideeffect "; def $0", "=v"() + call void @use256vgprs() + call void asm sideeffect "; use $0", "v"(<32 x i32> %v0) + call void asm sideeffect "; use $0", "v"(<32 x i32> %v1) + call void asm sideeffect "; use $0", "v"(<32 x i32> %v2) + call void asm sideeffect "; use $0", "v"(<32 x i32> %v3) + call void asm sideeffect "; use $0", "v"(<32 x i32> %v4) + call void asm sideeffect "; use $0", "v"(<32 x i32> %v5) + call void asm sideeffect "; use $0", "v"(<32 x i32> %v6) + call void asm sideeffect "; use $0", "v"(<32 x i32> %v7) + ret void +} + +define void @foo() #0 { + ret void +} + +attributes #0 = { noinline } + +; GCN-LABEL: {{^}}k256_w8: +; GFX90A: NumVgprs: 32 +; GFX90A: NumAgprs: 32 +; GFX90A: TotalNumVgprs: 64 +define amdgpu_kernel void @k256_w8() #2568 { + call void @foo() + call void @use256vgprs() + ret void +} + +; GCN-LABEL: {{^}}k256_w8_no_agprs: +; GFX90A: NumVgprs: 64 +; GFX90A: NumAgprs: 0 +; GFX90A: TotalNumVgprs: 64 +define amdgpu_kernel void @k256_w8_no_agprs() #2568 { + call void @use256vgprs() + ret void +} + +attributes #2568 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="8" } + +; GCN-LABEL: {{^}}k256_w4: +; GFX90A: NumVgprs: 64 +; GFX90A: NumAgprs: 64 +; GFX90A: TotalNumVgprs: 128 +define amdgpu_kernel void @k256_w4() #2564 { + call void @foo() + call void @use256vgprs() + ret void +} + +; GCN-LABEL: {{^}}k256_w4_no_agprs: +; GFX90A: NumVgprs: 128 +; GFX90A: NumAgprs: 0 +; GFX90A: TotalNumVgprs: 128 +define amdgpu_kernel void @k256_w4_no_agprs() #2564 { + call void @use256vgprs() + ret void +} + +attributes #2564 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="4" } + +; GCN-LABEL: {{^}}k256_w2: +; GFX90A: NumVgprs: 128 +; GFX90A: NumAgprs: 128 +; GFX90A: TotalNumVgprs: 256 +define amdgpu_kernel void @k256_w2() #2562 { + call void @foo() + call void @use256vgprs() + ret void +} + +; GCN-LABEL: {{^}}k256_w2_no_agprs: +; GFX90A: NumVgprs: 256 +; GFX90A: NumAgprs: 0 +; GFX90A: TotalNumVgprs: 256 +define amdgpu_kernel void @k256_w2_no_agprs() #2562 { + call void @use256vgprs() + ret void +} + +attributes #2562 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2" } + +; GCN-LABEL: {{^}}k256_w1: +; GFX90A: NumVgprs: 256 +; GFX90A: NumAgprs: 256 +; GFX90A: TotalNumVgprs: 512 +define amdgpu_kernel void @k256_w1() #2561 { + call void @foo() + call void @use512vgprs() + ret void +} + +; GCN-LABEL: {{^}}k256_w1_no_agprs: +; GFX90A: NumVgprs: 256 +; GFX90A: NumAgprs: 256 +; GFX90A: TotalNumVgprs: 512 +define amdgpu_kernel void @k256_w1_no_agprs() #2561 { + call void @use512vgprs() + ret void +} + +attributes #2561 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="1" } + +; GCN-LABEL: {{^}}k512_no_agprs: +; GFX90A: NumVgprs: 256 +; GFX90A: NumAgprs: 0 +; GFX90A: TotalNumVgprs: 256 +define amdgpu_kernel void @k512_no_agprs() #512 { + call void @use256vgprs() + ret void +} + +; GCN-LABEL: {{^}}k512_call: +; GFX90A: NumVgprs: 128 +; GFX90A: NumAgprs: 128 +; GFX90A: TotalNumVgprs: 256 +define amdgpu_kernel void @k512_call() #512 { + call void @foo() + call void @use256vgprs() + ret void +} + +; GCN-LABEL: {{^}}k512_virtual_agpr: +; GFX90A: NumVgprs: 128 +; GFX90A: NumAgprs: 128 +; GFX90A: TotalNumVgprs: 256 +define amdgpu_kernel void @k512_virtual_agpr() #512 { + %a0 = call i32 asm sideeffect "; def $0", "=a"() + call void @use256vgprs() + ret void +} + +; GCN-LABEL: {{^}}k512_physical_agpr: +; GFX90A: NumVgprs: 128 +; GFX90A: NumAgprs: 128 +; GFX90A: TotalNumVgprs: 256 +define amdgpu_kernel void @k512_physical_agpr() #512 { + call void asm sideeffect "", "~{a8}" () + call void @use256vgprs() + ret void +} + +; GCN-LABEL: {{^}}f512: +; GFX90A: NumVgprs: 12{{[0-9]}} +; GFX90A: NumAgprs: {{[1-9]}} +define void @f512() #512 { + call void @use256vgprs() + ret void +} + +attributes #512 = { nounwind "amdgpu-flat-work-group-size"="512,512" } + +; GCN-LABEL: {{^}}k1024: +; GFX90A: NumVgprs: 128 +; GFX90A: NumAgprs: 0 +; GFX90A: TotalNumVgprs: 128 +define amdgpu_kernel void @k1024() #1024 { + call void @use256vgprs() + ret void +} + +; GCN-LABEL: {{^}}k1024_call: +; GFX90A: NumVgprs: 64 +; GFX90A: NumAgprs: 64 +; GFX90A: TotalNumVgprs: 128 +define amdgpu_kernel void @k1024_call() #1024 { + call void @foo() + call void @use256vgprs() + ret void +} + +attributes #1024 = { nounwind "amdgpu-flat-work-group-size"="1024,1024" }