diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index e2dd4976f39065..f2fdf6564b723f 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -160,6 +160,11 @@ bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx); Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI); +/// Returns VP intrinsic ID for call. +/// For the input call instruction it finds mapping intrinsic and returns +/// its intrinsic ID, in case it does not found it return not_intrinsic. +Intrinsic::ID getVPIntrinsicIDForCall(const CallInst *CI); + /// Given a vector and an element number, see if the scalar value is /// already around as a register, for example if it were inserted then extracted /// from the vector. diff --git a/llvm/include/llvm/IR/VectorBuilder.h b/llvm/include/llvm/IR/VectorBuilder.h index b0277c2b52595e..31a64eddada877 100644 --- a/llvm/include/llvm/IR/VectorBuilder.h +++ b/llvm/include/llvm/IR/VectorBuilder.h @@ -99,11 +99,11 @@ class VectorBuilder { const Twine &Name = Twine()); /// Emit a VP reduction intrinsic call for recurrence kind. - /// \param RdxID The intrinsic ID of llvm.vector.reduce.* + /// \param ID The intrinsic ID of call Intrinsic /// \param ValTy The type of operand which the reduction operation is /// performed. /// \param VecOpArray The operand list. - Value *createSimpleReduction(Intrinsic::ID RdxID, Type *ValTy, + Value *createSimpleIntrinsic(Intrinsic::ID RdxID, Type *ValTy, ArrayRef VecOpArray, const Twine &Name = Twine()); }; diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index dbffbb8a5f81d9..70e0a15ac414e2 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -169,6 +169,15 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI, return Intrinsic::not_intrinsic; } +Intrinsic::ID llvm::getVPIntrinsicIDForCall(const CallInst *CI) { + const Function *F = CI->getCalledFunction(); + if (!F) + return Intrinsic::not_intrinsic; + + if (F->isIntrinsic()) + return VPIntrinsic::getForIntrinsic(F->getIntrinsicID()); +} + /// Given a vector and an element number, see if the scalar value is /// already around as a register, for example if it were inserted then extracted /// from the vector. diff --git a/llvm/lib/IR/VectorBuilder.cpp b/llvm/lib/IR/VectorBuilder.cpp index 737f49b1334d76..d629a2fb6af7b3 100644 --- a/llvm/lib/IR/VectorBuilder.cpp +++ b/llvm/lib/IR/VectorBuilder.cpp @@ -60,13 +60,12 @@ Value *VectorBuilder::createVectorInstruction(unsigned Opcode, Type *ReturnTy, return createVectorInstructionImpl(VPID, ReturnTy, InstOpArray, Name); } -Value *VectorBuilder::createSimpleReduction(Intrinsic::ID RdxID, - Type *ValTy, +Value *VectorBuilder::createSimpleIntrinsic(Intrinsic::ID ID, Type *ValTy, ArrayRef InstOpArray, const Twine &Name) { - auto VPID = VPIntrinsic::getForIntrinsic(RdxID); - assert(VPReductionIntrinsic::isVPReduction(VPID) && - "No VPIntrinsic for this reduction"); + auto VPID = VPIntrinsic::getForIntrinsic(ID); + assert(VPIntrinsic::isVPIntrinsic(VPID) && + "No VPIntrinsic for this Intrinsic"); return createVectorInstructionImpl(VPID, ValTy, InstOpArray, Name); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index cba73abdd15028..20b26fdacf8642 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1073,6 +1073,14 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return getArithmeticInstrCost(*FOp, ICA.getReturnType(), CostKind); break; } + // TODO: Need push a new patch + case Intrinsic::vp_smax: + case Intrinsic::vp_smin: + case Intrinsic::vp_umax: + case Intrinsic::vp_umin: { + // return LT.first; + return 1; + } // vp int cast ops. case Intrinsic::vp_trunc: case Intrinsic::vp_zext: diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 70047273c3b9af..2dac2d43f7f3a3 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1300,7 +1300,7 @@ Value *llvm::createSimpleReduction(VectorBuilder &VBuilder, Value *Src, Type *SrcEltTy = SrcTy->getElementType(); Value *Iden = getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags()); Value *Ops[] = {Iden, Src}; - return VBuilder.createSimpleReduction(Id, SrcTy, Ops); + return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops); } Value *llvm::createReduction(IRBuilderBase &B, @@ -1343,7 +1343,7 @@ Value *llvm::createOrderedReduction(VectorBuilder &VBuilder, Intrinsic::ID Id = getReductionIntrinsicID(RecurKind::FAdd); auto *SrcTy = cast(Src->getType()); Value *Ops[] = {Start, Src}; - return VBuilder.createSimpleReduction(Id, SrcTy, Ops); + return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops); } void llvm::propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8bf92f3480620a..9cabd5aceea804 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8351,7 +8351,6 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, return nullptr; SmallVector Ops(Operands.take_front(CI->arg_size())); - // Is it beneficial to perform intrinsic call compared to lib call? bool ShouldUseVectorIntrinsic = ID && LoopVectorizationPlanner::getDecisionAndClampRange( diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 4cef47e69f0e3b..3e6b08de8bdbe4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1708,6 +1708,20 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags { /// Returns true if the intrinsic may write to memory. bool mayWriteToMemory() const { return MayWriteToMemory; } + operand_range arg_operands() { + unsigned argNum = VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) + ? getNumOperands() - 1 + : getNumOperands(); + return make_range(op_begin(), op_begin() + argNum); + } + + const_operand_range arg_operands() const { + unsigned argNum = VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) + ? getNumOperands() - 1 + : getNumOperands(); + return make_range(op_begin(), op_begin() + argNum); + } + /// Returns true if the intrinsic may have side-effects. bool mayHaveSideEffects() const { return MayHaveSideEffects; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 6fe30356e8c912..2d5d69ca21b2e1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -970,7 +970,7 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) { if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) TysForDecl.push_back(VectorType::get(getResultType(), State.VF)); SmallVector Args; - for (const auto &I : enumerate(operands())) { + for (const auto &I : enumerate(arg_operands())) { // Some intrinsics have a scalar argument - don't replace it with a // vector. Value *Arg; @@ -983,18 +983,33 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) { Args.push_back(Arg); } - // Use vector version of the intrinsic. - Module *M = State.Builder.GetInsertBlock()->getModule(); - Function *VectorF = - Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl); - assert(VectorF && "Can't retrieve vector intrinsic."); - + CallInst *V = nullptr; auto *CI = cast_or_null(getUnderlyingValue()); SmallVector OpBundles; if (CI) CI->getOperandBundlesAsDefs(OpBundles); - CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles); + if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) { + // Use vector version of the vector predicate Intrinsic + IRBuilderBase &BuilderIR = State.Builder; + VectorBuilder VBuilder(BuilderIR); + Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue()); + VBuilder.setMask(Mask).setEVL( + State.get(getOperand(getNumOperands() - 1), /*NeedsScalar=*/true)); + auto *TyReturn = VectorType::get(getResultType(), State.VF); + Value *VPInst = VBuilder.createSimpleIntrinsic(VectorIntrinsicID, TyReturn, + Args, "vp.call"); + if (VPInst) { + V = cast(VPInst); + } + } else { + // Use vector version of the intrinsic. + Module *M = State.Builder.GetInsertBlock()->getModule(); + Function *VectorF = + Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl); + assert(VectorF && "Can't retrieve vector intrinsic."); + V = State.Builder.CreateCall(VectorF, Args, OpBundles); + } setFlags(V); @@ -1013,7 +1028,7 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF, // clear Arguments. // TODO: Rework TTI interface to be independent of concrete IR values. SmallVector Arguments; - for (const auto &[Idx, Op] : enumerate(operands())) { + for (const auto &[Idx, Op] : enumerate(arg_operands())) { auto *V = Op->getUnderlyingValue(); if (!V) { if (auto *UI = dyn_cast_or_null(getUnderlyingValue())) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index faec08cac18751..3d0c475a06b457 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1381,6 +1381,17 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { return nullptr; return new VPWidenEVLRecipe(*W, EVL); }) + .Case( + [&](VPWidenIntrinsicRecipe *CInst) -> VPRecipeBase * { + auto *CI = cast(CInst->getUnderlyingInstr()); + SmallVector Ops(CInst->operands()); + Ops.push_back(&EVL); + Intrinsic::ID VPID = getVPIntrinsicIDForCall(CI); + if (VPID == Intrinsic::not_intrinsic) + return nullptr; + return new VPWidenIntrinsicRecipe( + *CI, VPID, Ops, CI->getType(), CI->getDebugLoc()); + }) .Case([&](VPReductionRecipe *Red) { VPValue *NewMask = GetNewMask(Red->getCondOp()); return new VPReductionEVLRecipe(*Red, EVL, NewMask); diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll index 4970f6ac34928b..f4379b2d53348c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll @@ -27,7 +27,7 @@ define void @vp_smax(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMAX:%.+]]> = call llvm.smax(ir<[[LD1]]>, ir<[[LD2]]>) +; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMAX:%.+]]> = call llvm.vp.smax(ir<[[LD1]]>, ir<[[LD2]]>, vp<[[EVL]]>) ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMAX]]>, vp<[[EVL]]> @@ -39,20 +39,20 @@ define void @vp_smax(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: } entry: - br label %for.body - -for.body: - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv - %0 = load i32, ptr %arrayidx, align 4 - %arrayidx3 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv - %1 = load i32, ptr %arrayidx3, align 4 + br label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %gep = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %gep, align 4 + %gep3 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %gep3, align 4 %. = tail call i32 @llvm.smax.i32(i32 %0, i32 %1) - %arrayidx11 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv - store i32 %., ptr %arrayidx11, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond.not = icmp eq i64 %indvars.iv.next, %N - br i1 %exitcond.not, label %exit, label %for.body + %gep11 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %., ptr %gep11, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %exit, label %loop exit: ret void @@ -80,7 +80,7 @@ define void @vp_smin(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMIN:%.+]]> = call llvm.smin(ir<[[LD1]]>, ir<[[LD2]]>) +; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMIN:%.+]]> = call llvm.vp.smin(ir<[[LD1]]>, ir<[[LD2]]>, vp<[[EVL]]>) ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMIN]]>, vp<[[EVL]]> @@ -92,20 +92,20 @@ define void @vp_smin(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: } entry: - br label %for.body - -for.body: - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv - %0 = load i32, ptr %arrayidx, align 4 - %arrayidx3 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv - %1 = load i32, ptr %arrayidx3, align 4 + br label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %gep = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %gep, align 4 + %gep3 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %gep3, align 4 %. = tail call i32 @llvm.smin.i32(i32 %0, i32 %1) - %arrayidx11 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv - store i32 %., ptr %arrayidx11, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond.not = icmp eq i64 %indvars.iv.next, %N - br i1 %exitcond.not, label %exit, label %for.body + %gep11 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %., ptr %gep11, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %exit, label %loop exit: ret void @@ -133,7 +133,7 @@ define void @vp_umax(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMAX:%.+]]> = call llvm.umax(ir<[[LD1]]>, ir<[[LD2]]>) +; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMAX:%.+]]> = call llvm.vp.umax(ir<[[LD1]]>, ir<[[LD2]]>, vp<[[EVL]]>) ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMAX]]>, vp<[[EVL]]> @@ -145,20 +145,20 @@ define void @vp_umax(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: } entry: - br label %for.body - -for.body: - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv - %0 = load i32, ptr %arrayidx, align 4 - %arrayidx3 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv - %1 = load i32, ptr %arrayidx3, align 4 + br label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %gep = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %gep, align 4 + %gep3 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %gep3, align 4 %. = tail call i32 @llvm.umax.i32(i32 %0, i32 %1) - %arrayidx11 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv - store i32 %., ptr %arrayidx11, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond.not = icmp eq i64 %indvars.iv.next, %N - br i1 %exitcond.not, label %exit, label %for.body + %gep11 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %., ptr %gep11, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %exit, label %loop exit: ret void @@ -186,7 +186,7 @@ define void @vp_umin(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMIN:%.+]]> = call llvm.umin(ir<[[LD1]]>, ir<[[LD2]]>) +; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMIN:%.+]]> = call llvm.vp.umin(ir<[[LD1]]>, ir<[[LD2]]>, vp<[[EVL]]>) ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMIN]]>, vp<[[EVL]]> @@ -198,20 +198,20 @@ define void @vp_umin(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: } entry: - br label %for.body - -for.body: - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv - %0 = load i32, ptr %arrayidx, align 4 - %arrayidx3 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv - %1 = load i32, ptr %arrayidx3, align 4 + br label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %gep = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %gep, align 4 + %gep3 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %gep3, align 4 %. = tail call i32 @llvm.umin.i32(i32 %0, i32 %1) - %arrayidx11 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv - store i32 %., ptr %arrayidx11, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond.not = icmp eq i64 %indvars.iv.next, %N - br i1 %exitcond.not, label %exit, label %for.body + %gep11 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %., ptr %gep11, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %exit, label %loop exit: ret void