From 4d6e69143dc449814884ac649583d3b35bc4ae91 Mon Sep 17 00:00:00 2001 From: Haopeng Liu <153236845+haopliu@users.noreply.github.com> Date: Wed, 20 Nov 2024 19:15:23 -0800 Subject: [PATCH 001/351] Add the initializes attribute inference (#117104) reland https://github.com/llvm/llvm-project/pull/97373 after fixing clang tests. Confirmed with "ninja check-llvm" and "ninja check-clang" --- .../test/CodeGen/AArch64/pure-scalable-args.c | 6 +- ...-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c | 2 +- .../test/CodeGen/SystemZ/systemz-inline-asm.c | 2 +- .../CodeGen/SystemZ/zos-mixed-ptr-sizes.c | 14 +- clang/test/CodeGen/X86/ms-x86-intrinsics.c | 4 +- clang/test/CodeGen/arm-vfp16-arguments.c | 2 +- clang/test/CodeGen/arm-vfp16-arguments2.cpp | 10 +- clang/test/CodeGen/isfpclass.c | 2 +- .../math-libcalls-tbaa-indirect-args.c | 12 +- clang/test/CodeGen/ms-mixed-ptr-sizes.c | 10 +- .../tbaa-struct-bitfield-endianness.cpp | 2 +- clang/test/CodeGen/union-tbaa1.c | 2 +- .../amdgpu-kernel-arg-pointer-type.cu | 4 +- .../CodeGenCXX/inline-then-fold-variadics.cpp | 8 +- clang/test/CodeGenCXX/wasm-args-returns.cpp | 2 +- .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 8 +- .../test/CodeGenOpenCL/amdgpu-call-kernel.cl | 2 +- .../kernels-have-spir-cc-by-default.cl | 8 +- .../array-type-infinite-loop.clcpp | 2 +- llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 329 +++++++++- .../TypeBasedAliasAnalysis/functionattrs.ll | 2 +- .../amdgpu-libcall-sincos-pass-ordering.ll | 2 +- .../AMDGPU/amdgpu-simplify-libcall-sincos.ll | 104 ++-- .../BPF/preserve-static-offset/store-zero.ll | 2 +- llvm/test/Other/optimize-inrange-gep.ll | 2 +- llvm/test/Transforms/Coroutines/coro-async.ll | 6 +- .../Transforms/FunctionAttrs/argmemonly.ll | 10 +- .../Transforms/FunctionAttrs/initializes.ll | 572 ++++++++++++++++++ .../Transforms/FunctionAttrs/readattrs.ll | 4 +- .../Transforms/FunctionAttrs/writeonly.ll | 4 +- .../PhaseOrdering/X86/unroll-vectorizer.ll | 2 +- .../Transforms/PhaseOrdering/memcpy-offset.ll | 2 +- llvm/test/Transforms/PhaseOrdering/pr95152.ll | 6 +- 33 files changed, 1022 insertions(+), 127 deletions(-) create mode 100644 llvm/test/Transforms/FunctionAttrs/initializes.ll diff --git a/clang/test/CodeGen/AArch64/pure-scalable-args.c b/clang/test/CodeGen/AArch64/pure-scalable-args.c index 53d5ce4e8c9d9b5..5c74447100aa8cd 100644 --- a/clang/test/CodeGen/AArch64/pure-scalable-args.c +++ b/clang/test/CodeGen/AArch64/pure-scalable-args.c @@ -292,7 +292,7 @@ PST test_return(PST *p) { return *p; } // CHECK-AAPCS: define dso_local <{ , , , , , }> @test_return(ptr -// CHECK-DARWIN: define void @test_return(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.PST) align 16 %agg.result, ptr nocapture noundef readonly %p) +// CHECK-DARWIN: define void @test_return(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.PST) align 16 initializes((0, 96)) %agg.result, ptr nocapture noundef readonly %p) // Corner case of 1-element aggregate // p->x -> q0 @@ -308,8 +308,8 @@ SmallPST test_return_small_pst(SmallPST *p) { BigPST test_return_big_pst(BigPST *p) { return *p; } -// CHECK-AAPCS: define dso_local void @test_return_big_pst(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.BigPST) align 16 %agg.result, ptr nocapture noundef readonly %p) -// CHECK-DARWIN: define void @test_return_big_pst(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.BigPST) align 16 %agg.result, ptr nocapture noundef readonly %p) +// CHECK-AAPCS: define dso_local void @test_return_big_pst(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.BigPST) align 16 initializes((0, 176)) %agg.result, ptr nocapture noundef readonly %p) +// CHECK-DARWIN: define void @test_return_big_pst(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.BigPST) align 16 initializes((0, 176)) %agg.result, ptr nocapture noundef readonly %p) // Variadic arguments are unnamed, PST passed indirectly. // (Passing SVE types to a variadic function currently unsupported by diff --git a/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c b/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c index 55e1ed393d848bb..54e90223a31de0e 100644 --- a/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c +++ b/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c @@ -59,7 +59,7 @@ typedef int8_t vec_int8 __attribute__((vector_size(N / 8))); // CHECK128-NEXT: ret <16 x i8> [[CASTFIXEDSVE]] // CHECK-LABEL: define{{.*}} void @f2( -// CHECK-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret(<[[#div(VBITS,8)]] x i8>) align 16 %agg.result, ptr nocapture noundef readonly %0) +// CHECK-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret(<[[#div(VBITS,8)]] x i8>) align 16 initializes((0, [[#div(VBITS,8)]])) %agg.result, ptr nocapture noundef readonly %0) // CHECK-NEXT: entry: // CHECK-NEXT: [[X:%.*]] = load <[[#div(VBITS,8)]] x i8>, ptr [[TMP0:%.*]], align 16, [[TBAA6:!tbaa !.*]] // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) diff --git a/clang/test/CodeGen/SystemZ/systemz-inline-asm.c b/clang/test/CodeGen/SystemZ/systemz-inline-asm.c index e38d37cd345e261..2a9d6a5f8745480 100644 --- a/clang/test/CodeGen/SystemZ/systemz-inline-asm.c +++ b/clang/test/CodeGen/SystemZ/systemz-inline-asm.c @@ -123,7 +123,7 @@ double test_f64(double f, double g) { long double test_f128(long double f, long double g) { asm("axbr %0, %2" : "=f" (f) : "0" (f), "f" (g)); return f; -// CHECK: define{{.*}} void @test_f128(ptr dead_on_unwind noalias nocapture writable writeonly sret(fp128) align 8 [[DEST:%.*]], ptr nocapture noundef readonly %0, ptr nocapture noundef readonly %1) +// CHECK: define{{.*}} void @test_f128(ptr dead_on_unwind noalias nocapture writable writeonly sret(fp128) align 8 initializes((0, 16)) [[DEST:%.*]], ptr nocapture noundef readonly %0, ptr nocapture noundef readonly %1) // CHECK: %f = load fp128, ptr %0 // CHECK: %g = load fp128, ptr %1 // CHECK: [[RESULT:%.*]] = tail call fp128 asm "axbr $0, $2", "=f,0,f"(fp128 %f, fp128 %g) diff --git a/clang/test/CodeGen/SystemZ/zos-mixed-ptr-sizes.c b/clang/test/CodeGen/SystemZ/zos-mixed-ptr-sizes.c index 6194c9b1804fb0b..82bb7a52d05d11a 100644 --- a/clang/test/CodeGen/SystemZ/zos-mixed-ptr-sizes.c +++ b/clang/test/CodeGen/SystemZ/zos-mixed-ptr-sizes.c @@ -12,42 +12,42 @@ struct Foo { void use_foo(struct Foo *f); void ptr32_to_ptr(struct Foo *f, int * __ptr32 i) { - // X64-LABEL: define void @ptr32_to_ptr(ptr noundef %f, ptr addrspace(1) noundef %i) + // X64-LABEL: define void @ptr32_to_ptr(ptr noundef initializes((8, 16)) %f, ptr addrspace(1) noundef %i) // X64: %{{.+}} = addrspacecast ptr addrspace(1) %i to ptr f->p64= i; use_foo(f); } void ptr_to_ptr32(struct Foo *f, int *i) { - // X64-LABEL: define void @ptr_to_ptr32(ptr noundef %f, ptr noundef %i) + // X64-LABEL: define void @ptr_to_ptr32(ptr noundef initializes((0, 4)) %f, ptr noundef %i) // X64: %{{.+}} = addrspacecast ptr %i to ptr addrspace(1) f->p32 = i; use_foo(f); } void ptr32_to_ptr32(struct Foo *f, int * __ptr32 i) { - // X64-LABEL: define void @ptr32_to_ptr32(ptr noundef %f, ptr addrspace(1) noundef %i) + // X64-LABEL: define void @ptr32_to_ptr32(ptr noundef initializes((0, 4)) %f, ptr addrspace(1) noundef %i) // X64-NOT: addrspacecast f->p32 = i; use_foo(f); } void ptr_to_ptr32_explicit_cast(struct Foo *f, int *i) { - // X64-LABEL: define void @ptr_to_ptr32_explicit_cast(ptr noundef %f, ptr noundef %i) + // X64-LABEL: define void @ptr_to_ptr32_explicit_cast(ptr noundef initializes((0, 4)) %f, ptr noundef %i) // X64: %{{.+}} = addrspacecast ptr %i to ptr addrspace(1) f->p32 = (int * __ptr32)i; use_foo(f); } void test_indexing(struct Foo *f) { - // X64-LABEL: define void @test_indexing(ptr noundef %f) + // X64-LABEL: define void @test_indexing(ptr noundef initializes((16, 24)) %f) // X64: addrspacecast ptr addrspace(1) {{%[0-9]}} to ptr f->cp64 = ((char * __ptr32 *)1028)[1]; use_foo(f); } void test_indexing_2(struct Foo *f) { - // X64-LABEL: define void @test_indexing_2(ptr noundef %f) + // X64-LABEL: define void @test_indexing_2(ptr noundef initializes((16, 24)) %f) // X64: getelementptr inbounds i8, ptr addrspace(1) {{%[0-9]}}, i32 16 // X64: getelementptr inbounds i8, ptr {{%[0-9]}}, i64 24 f->cp64 = ((char *** __ptr32 *)1028)[1][2][3]; @@ -108,7 +108,7 @@ int test_misc_4() { } void test_misc_5(struct Foo *f) { - // X64-LABEL: define void @test_misc_5(ptr noundef %f) + // X64-LABEL: define void @test_misc_5(ptr noundef initializes((16, 24)) %f) // X64: addrspacecast ptr addrspace(1) %0 to ptr f->cp64 = *(char* __ptr32 *)(PSA_PTR + PSAAOLD); use_foo(f); diff --git a/clang/test/CodeGen/X86/ms-x86-intrinsics.c b/clang/test/CodeGen/X86/ms-x86-intrinsics.c index b90e2679e26d27d..94a1b372974b38f 100644 --- a/clang/test/CodeGen/X86/ms-x86-intrinsics.c +++ b/clang/test/CodeGen/X86/ms-x86-intrinsics.c @@ -171,7 +171,7 @@ __int64 test_mul128(__int64 Multiplier, __int64 *HighProduct) { return _mul128(Multiplier, Multiplicand, HighProduct); } -// CHECK-X64-LABEL: define dso_local i64 @test_mul128(i64 noundef %Multiplier, i64 noundef %Multiplicand, ptr{{[a-z_ ]*}}%HighProduct) +// CHECK-X64-LABEL: define dso_local i64 @test_mul128(i64 noundef %Multiplier, i64 noundef %Multiplicand, ptr{{[a-z_ ]*}} initializes((0, 8)) %HighProduct) // CHECK-X64: = sext i64 %Multiplier to i128 // CHECK-X64: = sext i64 %Multiplicand to i128 // CHECK-X64: = mul nsw i128 % @@ -183,7 +183,7 @@ unsigned __int64 test_umul128(unsigned __int64 Multiplier, unsigned __int64 *HighProduct) { return _umul128(Multiplier, Multiplicand, HighProduct); } -// CHECK-X64-LABEL: define dso_local i64 @test_umul128(i64 noundef %Multiplier, i64 noundef %Multiplicand, ptr{{[a-z_ ]*}}%HighProduct) +// CHECK-X64-LABEL: define dso_local i64 @test_umul128(i64 noundef %Multiplier, i64 noundef %Multiplicand, ptr{{[a-z_ ]*}} initializes((0, 8)) %HighProduct) // CHECK-X64: = zext i64 %Multiplier to i128 // CHECK-X64: = zext i64 %Multiplicand to i128 // CHECK-X64: = mul nuw i128 % diff --git a/clang/test/CodeGen/arm-vfp16-arguments.c b/clang/test/CodeGen/arm-vfp16-arguments.c index da034626024f82c..3c6691df4747abb 100644 --- a/clang/test/CodeGen/arm-vfp16-arguments.c +++ b/clang/test/CodeGen/arm-vfp16-arguments.c @@ -71,6 +71,6 @@ void test_hfa(hfa_t a) {} hfa_t ghfa; hfa_t test_ret_hfa(void) { return ghfa; } -// CHECK-SOFT: define{{.*}} void @test_ret_hfa(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.hfa_t) align 8 %agg.result) +// CHECK-SOFT: define{{.*}} void @test_ret_hfa(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.hfa_t) align 8 initializes((0, 16)) %agg.result) // CHECK-HARD: define{{.*}} arm_aapcs_vfpcc [2 x <2 x i32>] @test_ret_hfa() // CHECK-FULL: define{{.*}} arm_aapcs_vfpcc %struct.hfa_t @test_ret_hfa() diff --git a/clang/test/CodeGen/arm-vfp16-arguments2.cpp b/clang/test/CodeGen/arm-vfp16-arguments2.cpp index b7c6852c47b7f07..b810cfd0a6648f9 100644 --- a/clang/test/CodeGen/arm-vfp16-arguments2.cpp +++ b/clang/test/CodeGen/arm-vfp16-arguments2.cpp @@ -37,27 +37,27 @@ struct S5 : B1 { B1 M[1]; }; -// CHECK-SOFT: define{{.*}} void @_Z2f12S1(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.S1) align 8 %agg.result, [2 x i64] %s1.coerce) +// CHECK-SOFT: define{{.*}} void @_Z2f12S1(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.S1) align 8 initializes((0, 16)) %agg.result, [2 x i64] %s1.coerce) // CHECK-HARD: define{{.*}} arm_aapcs_vfpcc [2 x <2 x i32>] @_Z2f12S1([2 x <2 x i32>] returned %s1.coerce) // CHECK-FULL: define{{.*}} arm_aapcs_vfpcc %struct.S1 @_Z2f12S1(%struct.S1 returned %s1.coerce) struct S1 f1(struct S1 s1) { return s1; } -// CHECK-SOFT: define{{.*}} void @_Z2f22S2(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.S2) align 8 %agg.result, [4 x i32] %s2.coerce) +// CHECK-SOFT: define{{.*}} void @_Z2f22S2(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.S2) align 8 initializes((0, 16)) %agg.result, [4 x i32] %s2.coerce) // CHECK-HARD: define{{.*}} arm_aapcs_vfpcc [2 x <2 x i32>] @_Z2f22S2([2 x <2 x i32>] returned %s2.coerce) // CHECK-FULL: define{{.*}} arm_aapcs_vfpcc %struct.S2 @_Z2f22S2(%struct.S2 %s2.coerce) struct S2 f2(struct S2 s2) { return s2; } -// CHECK-SOFT: define{{.*}} void @_Z2f32S3(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.S3) align 8 %agg.result, [2 x i64] %s3.coerce) +// CHECK-SOFT: define{{.*}} void @_Z2f32S3(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.S3) align 8 initializes((0, 16)) %agg.result, [2 x i64] %s3.coerce) // CHECK-HARD: define{{.*}} arm_aapcs_vfpcc [2 x <2 x i32>] @_Z2f32S3([2 x <2 x i32>] returned %s3.coerce) // CHECK-FULL: define{{.*}} arm_aapcs_vfpcc %struct.S3 @_Z2f32S3(%struct.S3 %s3.coerce) struct S3 f3(struct S3 s3) { return s3; } -// CHECK-SOFT: define{{.*}} void @_Z2f42S4(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.S4) align 8 %agg.result, [2 x i64] %s4.coerce) +// CHECK-SOFT: define{{.*}} void @_Z2f42S4(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.S4) align 8 initializes((0, 16)) %agg.result, [2 x i64] %s4.coerce) // CHECK-HARD: define{{.*}} arm_aapcs_vfpcc [2 x <2 x i32>] @_Z2f42S4([2 x <2 x i32>] returned %s4.coerce) // CHECK-FULL: define{{.*}} arm_aapcs_vfpcc %struct.S4 @_Z2f42S4(%struct.S4 %s4.coerce) struct S4 f4(struct S4 s4) { return s4; } -// CHECK-SOFT: define{{.*}} void @_Z2f52S5(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.S5) align 8 %agg.result, [2 x i64] %s5.coerce) +// CHECK-SOFT: define{{.*}} void @_Z2f52S5(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.S5) align 8 initializes((0, 16)) %agg.result, [2 x i64] %s5.coerce) // CHECK-HARD: define{{.*}} arm_aapcs_vfpcc %struct.S5 @_Z2f52S5(%struct.S5 %s5.coerce) // CHECK-FULL: define{{.*}} arm_aapcs_vfpcc %struct.S5 @_Z2f52S5(%struct.S5 %s5.coerce) struct S5 f5(struct S5 s5) { return s5; } diff --git a/clang/test/CodeGen/isfpclass.c b/clang/test/CodeGen/isfpclass.c index a0e04eaad5929dd..1bf60b8fbca176f 100644 --- a/clang/test/CodeGen/isfpclass.c +++ b/clang/test/CodeGen/isfpclass.c @@ -160,7 +160,7 @@ int4 check_isfpclass_nan_strict_v4f32(float4 x) { } // CHECK-LABEL: define dso_local void @check_isfpclass_nan_v4f64 -// CHECK-SAME: (ptr dead_on_unwind noalias nocapture writable writeonly sret(<4 x i64>) align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly [[TMP0:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { +// CHECK-SAME: (ptr dead_on_unwind noalias nocapture writable writeonly sret(<4 x i64>) align 16 initializes((0, 32)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly [[TMP0:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[X:%.*]] = load <4 x double>, ptr [[TMP0]], align 16, !tbaa [[TBAA2:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = fcmp uno <4 x double> [[X]], zeroinitializer diff --git a/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c index 8e5f015647e4141..440db83fef5daaf 100644 --- a/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c +++ b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c @@ -53,7 +53,7 @@ long double powl(long double a, long double b); // CHECK-SPIR: [[CALL:%.*]] = tail call spir_func double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] // // CHECK-MINGW32-LABEL: define dso_local void @test_powl( -// CHECK-MINGW32-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret(x86_fp80) align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly [[TMP0:%.*]], ptr nocapture noundef readonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-MINGW32-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret(x86_fp80) align 16 initializes((0, 10)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly [[TMP0:%.*]], ptr nocapture noundef readonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-MINGW32: [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[TBAA3:![0-9]+]] // CHECK-MINGW32: [[B:%.*]] = load x86_fp80, ptr [[TMP1]], align 16, !tbaa [[TBAA3]] // CHECK-MINGW32: store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[TBAA3]] @@ -83,7 +83,7 @@ long double test_powl(long double a, long double b) { // CHECK-WIN64: [[CALL:%.*]] = tail call x86_fp80 @cargl(ptr noundef nonnull byval({ x86_fp80, x86_fp80 }) align 16 [[BYVAL_TEMP]]) #[[ATTR5]] // // CHECK-I686-LABEL: define dso_local void @test_cargl( -// CHECK-I686-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ x86_fp80, x86_fp80 }) align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval({ x86_fp80, x86_fp80 }) align 4 [[CLD:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-I686-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ x86_fp80, x86_fp80 }) align 4 initializes((0, 10), (12, 22)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval({ x86_fp80, x86_fp80 }) align 4 [[CLD:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-I686: [[CLD_REAL:%.*]] = load x86_fp80, ptr [[CLD]], align 4 // CHECK-I686: [[CLD_IMAG:%.*]] = load x86_fp80, ptr [[CLD_IMAGP:%.*]], align 4 // CHECK-I686: store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 4 @@ -93,7 +93,7 @@ long double test_powl(long double a, long double b) { // CHECK-I686: store x86_fp80 [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 4 // // CHECK-PPC-LABEL: define dso_local void @test_cargl( -// CHECK-PPC-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ ppc_fp128, ppc_fp128 }) align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval({ ppc_fp128, ppc_fp128 }) align 16 [[CLD:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-PPC-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ ppc_fp128, ppc_fp128 }) align 16 initializes((0, 32)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval({ ppc_fp128, ppc_fp128 }) align 16 [[CLD:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-PPC: [[CLD_REAL:%.*]] = load ppc_fp128, ptr [[CLD]], align 16 // CHECK-PPC: [[CLD_IMAG:%.*]] = load ppc_fp128, ptr [[CLD_IMAGP:%.*]], align 16 // CHECK-PPC: store ppc_fp128 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16 @@ -103,7 +103,7 @@ long double test_powl(long double a, long double b) { // CHECK-PPC: store ppc_fp128 [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 16 // // CHECK-ARM-LABEL: define dso_local void @test_cargl( -// CHECK-ARM-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ double, double }) align 8 [[AGG_RESULT:%.*]], [2 x i64] noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-ARM-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ double, double }) align 8 initializes((0, 16)) [[AGG_RESULT:%.*]], [2 x i64] noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-ARM: [[CALL:%.*]] = tail call double @cargl([2 x i64] noundef [[CLD_COERCE]]) #[[ATTR2]], !tbaa [[TBAA3]] // CHECK-ARM: store double [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 8 // CHECK-ARM: store double [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 8 @@ -121,7 +121,7 @@ long double test_powl(long double a, long double b) { // CHECK-AARCH: [[CALL:%.*]] = tail call fp128 @cargl([2 x fp128] noundef alignstack(16) [[CLD_COERCE]]) #[[ATTR2]], !tbaa [[TBAA2]] // // CHECK-SPIR-LABEL: define dso_local spir_func void @test_cargl( -// CHECK-SPIR-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ double, double }) align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval({ double, double }) align 8 [[CLD:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-SPIR-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ double, double }) align 8 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval({ double, double }) align 8 [[CLD:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-SPIR: [[CLD_REAL:%.*]] = load double, ptr [[CLD]], align 8 // CHECK-SPIR: [[CLD_IMAG:%.*]] = load double, ptr [[CLD_IMAGP:%.*]], align 8 // CHECK-SPIR: store double [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 8 @@ -131,7 +131,7 @@ long double test_powl(long double a, long double b) { // CHECK-SPIR: store double [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 8 // // CHECK-MINGW32-LABEL: define dso_local void @test_cargl( -// CHECK-MINGW32-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ x86_fp80, x86_fp80 }) align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly [[CLD:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-MINGW32-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ x86_fp80, x86_fp80 }) align 16 initializes((0, 10), (16, 26)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly [[CLD:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-MINGW32: [[CLD_REAL:%.*]] = load x86_fp80, ptr [[CLD]], align 16 // CHECK-MINGW32: [[CLD_IMAG:%.*]] = load x86_fp80, ptr [[CLD_IMAGP:%.*]], align 16 // CHECK-MINGW32: store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16 diff --git a/clang/test/CodeGen/ms-mixed-ptr-sizes.c b/clang/test/CodeGen/ms-mixed-ptr-sizes.c index f99c6196557e189..6ba315e165d3f10 100644 --- a/clang/test/CodeGen/ms-mixed-ptr-sizes.c +++ b/clang/test/CodeGen/ms-mixed-ptr-sizes.c @@ -9,7 +9,7 @@ struct Foo { void use_foo(struct Foo *f); void test_sign_ext(struct Foo *f, int * __ptr32 __sptr i) { // X64-LABEL: define dso_local void @test_sign_ext({{.*}}ptr addrspace(270) noundef %i) -// X86-LABEL: define dso_local void @test_sign_ext(ptr noundef %f, ptr noundef %i) +// X86-LABEL: define dso_local void @test_sign_ext(ptr noundef initializes((8, 16)) %f, ptr noundef %i) // AARCH64-LABEL: define dso_local void @test_sign_ext({{.*}}ptr addrspace(270) noundef %i) local_unnamed_addr #0 // X64: %{{.+}} = addrspacecast ptr addrspace(270) %i to ptr // X86: %{{.+}} = addrspacecast ptr %i to ptr addrspace(272) @@ -18,9 +18,9 @@ void test_sign_ext(struct Foo *f, int * __ptr32 __sptr i) { use_foo(f); } void test_zero_ext(struct Foo *f, int * __ptr32 __uptr i) { -// X64-LABEL: define dso_local void @test_zero_ext({{.*}}ptr addrspace(271) noundef %i) +// X64-LABEL: define dso_local void @test_zero_ext(ptr noundef initializes((8, 16)) %f, ptr addrspace(271) noundef %i) // X86-LABEL: define dso_local void @test_zero_ext({{.*}}ptr addrspace(271) noundef %i) -// AARCH64-LABEL: define dso_local void @test_zero_ext({{.*}}ptr addrspace(271) noundef %i) local_unnamed_addr #0 +// AARCH64-LABEL: define dso_local void @test_zero_ext(ptr noundef initializes((8, 16)) %f, ptr addrspace(271) noundef %i) local_unnamed_addr #0 // X64: %{{.+}} = addrspacecast ptr addrspace(271) %i to ptr // X86: %{{.+}} = addrspacecast ptr addrspace(271) %i to ptr addrspace(272) // AARCH64: %{{.+}} = addrspacecast ptr addrspace(271) %i to ptr @@ -28,9 +28,9 @@ void test_zero_ext(struct Foo *f, int * __ptr32 __uptr i) { use_foo(f); } void test_trunc(struct Foo *f, int * __ptr64 i) { -// X64-LABEL: define dso_local void @test_trunc(ptr noundef %f, ptr noundef %i) +// X64-LABEL: define dso_local void @test_trunc(ptr noundef initializes((0, 4)) %f, ptr noundef %i) // X86-LABEL: define dso_local void @test_trunc({{.*}}ptr addrspace(272) noundef %i) -// AARCH64-LABEL: define dso_local void @test_trunc(ptr noundef %f, ptr noundef %i) local_unnamed_addr #0 +// AARCH64-LABEL: define dso_local void @test_trunc(ptr noundef initializes((0, 4)) %f, ptr noundef %i) local_unnamed_addr #0 // X64: %{{.+}} = addrspacecast ptr %i to ptr addrspace(270) // X86: %{{.+}} = addrspacecast ptr addrspace(272) %i to ptr // AARCH64: %{{.+}} = addrspacecast ptr %i to ptr addrspace(270) diff --git a/clang/test/CodeGen/tbaa-struct-bitfield-endianness.cpp b/clang/test/CodeGen/tbaa-struct-bitfield-endianness.cpp index e8bb46982537bb9..7173b6e8fbe2a19 100644 --- a/clang/test/CodeGen/tbaa-struct-bitfield-endianness.cpp +++ b/clang/test/CodeGen/tbaa-struct-bitfield-endianness.cpp @@ -16,7 +16,7 @@ struct NamedBitfields { }; // CHECK-LABEL: _Z4copyP14NamedBitfieldsS0_ -// CHECK-SAME: ptr nocapture noundef writeonly [[A1:%.*]], ptr nocapture noundef readonly [[A2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-SAME: ptr nocapture noundef writeonly initializes((0, 16)) [[A1:%.*]], ptr nocapture noundef readonly [[A2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) [[A1]], ptr noundef nonnull align 8 dereferenceable(16) [[A2]], i64 16, i1 false), !tbaa.struct [[TBAA_STRUCT2:![0-9]+]] // CHECK-NEXT: ret void diff --git a/clang/test/CodeGen/union-tbaa1.c b/clang/test/CodeGen/union-tbaa1.c index a5faa8269aed698..5263b1714c8c6ed 100644 --- a/clang/test/CodeGen/union-tbaa1.c +++ b/clang/test/CodeGen/union-tbaa1.c @@ -8,7 +8,7 @@ typedef union __attribute__((aligned(4))) { void bar(vect32 p[][2]); // CHECK-LABEL: define dso_local void @fred -// CHECK-SAME: (i32 noundef [[NUM:%.*]], ptr nocapture noundef writeonly [[VEC:%.*]], ptr nocapture noundef readonly [[INDEX:%.*]], ptr nocapture noundef readonly [[ARR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-SAME: (i32 noundef [[NUM:%.*]], ptr nocapture noundef writeonly initializes((0, 8)) [[VEC:%.*]], ptr nocapture noundef readonly [[INDEX:%.*]], ptr nocapture noundef readonly [[ARR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP:%.*]] = alloca [4 x [2 x %union.vect32]], align 8 // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[TMP]]) #[[ATTR3:[0-9]+]] diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu index b295bbbdaaf955a..838bdda82572867 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -188,14 +188,14 @@ __global__ void kernel2(int &x) { // CHECK-SPIRV-NEXT: ret void // // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i( -// OPT-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// OPT-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { // OPT-NEXT: [[ENTRY:.*:]] // OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4 // OPT-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4 // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i( -// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] { +// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4 // OPT-SPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4 diff --git a/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp index a0673b96626d15e..4aa79a28dd7d3d0 100644 --- a/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp +++ b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp @@ -108,7 +108,7 @@ typedef uint64_t ulong2 __attribute__((__vector_size__(16), __aligned__(16))); int first_i32_ulong2(int x, ulong2 *y) { return first(x, *y); } // CHECK-LABEL: define {{[^@]+}}@second_i32_ulong2 -// CHECK-SAME: (i32 noundef [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-SAME: (i32 noundef [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]], ptr nocapture noundef writeonly initializes((0, 16)) [[R:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]] // CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[TBAA2]] @@ -119,7 +119,7 @@ void second_i32_ulong2(int x, ulong2 *y, ulong2 *r) { } // CHECK-LABEL: define {{[^@]+}}@first_ulong2_i32 -// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef [[Y:%.*]], ptr nocapture noundef writeonly initializes((0, 16)) [[R:%.*]]) local_unnamed_addr #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[X]], align 16, !tbaa [[TBAA2]] // CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[TBAA2]] @@ -157,7 +157,7 @@ extern "C" { int first_i32_asc(int x, asc *y) { return first(x, *y); } // CHECK-LABEL: define {{[^@]+}}@second_i32_asc -// CHECK-SAME: (i32 noundef [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SAME: (i32 noundef [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]], ptr nocapture noundef writeonly initializes((0, 24)) [[R:%.*]]) local_unnamed_addr #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(24) [[R]], ptr noundef nonnull align 1 dereferenceable(24) [[Y]], i32 24, i1 false) // CHECK-NEXT: ret void @@ -165,7 +165,7 @@ int first_i32_asc(int x, asc *y) { return first(x, *y); } void second_i32_asc(int x, asc *y, asc *r) { *r = second(x, *y); } // CHECK-LABEL: define {{[^@]+}}@first_asc_i32 -// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef [[Y:%.*]], ptr nocapture noundef writeonly initializes((0, 24)) [[R:%.*]]) local_unnamed_addr #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(24) [[R]], ptr noundef nonnull align 1 dereferenceable(24) [[X]], i32 24, i1 false) // CHECK-NEXT: ret void diff --git a/clang/test/CodeGenCXX/wasm-args-returns.cpp b/clang/test/CodeGenCXX/wasm-args-returns.cpp index e80dfefedece168..b57896b0e0ffe77 100644 --- a/clang/test/CodeGenCXX/wasm-args-returns.cpp +++ b/clang/test/CodeGenCXX/wasm-args-returns.cpp @@ -30,7 +30,7 @@ struct two_fields { double d, e; }; test(two_fields); -// CHECK: define void @_Z7forward10two_fields(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.two_fields) align 8 %{{.*}}, ptr nocapture readonly byval(%struct.two_fields) align 8 %{{.*}}) +// CHECK: define void @_Z7forward10two_fields(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.two_fields) align 8 initializes((0, 16)) %{{.*}}, ptr nocapture readonly byval(%struct.two_fields) align 8 %{{.*}}) // // CHECK: define void @_Z15test_two_fieldsv() // CHECK: %[[tmp:.*]] = alloca %struct.two_fields, align 8 diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl index 5cb8af6fc6df9c8..35a08a90d8cf907 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -402,14 +402,14 @@ struct_arr16 func_ret_struct_arr16() return s; } -// CHECK: define{{.*}} void @func_ret_struct_arr32(ptr addrspace(5) dead_on_unwind noalias nocapture writable writeonly sret(%struct.struct_arr32) align 4 %agg.result) +// CHECK: define{{.*}} void @func_ret_struct_arr32(ptr addrspace(5) dead_on_unwind noalias nocapture writable writeonly sret(%struct.struct_arr32) align 4 initializes((0, 128)) %agg.result) struct_arr32 func_ret_struct_arr32() { struct_arr32 s = { 0 }; return s; } -// CHECK: define{{.*}} void @func_ret_struct_arr33(ptr addrspace(5) dead_on_unwind noalias nocapture writable writeonly sret(%struct.struct_arr33) align 4 %agg.result) +// CHECK: define{{.*}} void @func_ret_struct_arr33(ptr addrspace(5) dead_on_unwind noalias nocapture writable writeonly sret(%struct.struct_arr33) align 4 initializes((0, 132)) %agg.result) struct_arr33 func_ret_struct_arr33() { struct_arr33 s = { 0 }; @@ -438,7 +438,7 @@ different_size_type_pair func_different_size_type_pair_ret() return s; } -// CHECK: define{{.*}} void @func_flexible_array_ret(ptr addrspace(5) dead_on_unwind noalias nocapture writable writeonly sret(%struct.flexible_array) align 4 %agg.result) +// CHECK: define{{.*}} void @func_flexible_array_ret(ptr addrspace(5) dead_on_unwind noalias nocapture writable writeonly sret(%struct.flexible_array) align 4 initializes((0, 4)) %agg.result) flexible_array func_flexible_array_ret() { flexible_array s = { 0 }; @@ -467,7 +467,7 @@ double_nested_struct func_double_nested_struct_ret(int4 arg0, int arg1) { // CHECK: define{{.*}} void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14) void func_large_struct_padding_arg_direct(large_struct_padding arg) { } -// CHECK: define{{.*}} void @func_large_struct_padding_arg_store(ptr addrspace(1) nocapture noundef writeonly %out, ptr addrspace(5) nocapture noundef readonly byref(%struct.large_struct_padding) align 8 %{{.*}}) +// CHECK: define{{.*}} void @func_large_struct_padding_arg_store(ptr addrspace(1) nocapture noundef writeonly initializes((0, 56)) %out, ptr addrspace(5) nocapture noundef readonly byref(%struct.large_struct_padding) align 8 %{{.*}}) void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) { *out = arg; } diff --git a/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl index e4678abf3350099..dfca09d034cdb1d 100755 --- a/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl @@ -1,6 +1,6 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -emit-llvm -o - %s | FileCheck %s -// CHECK: define{{.*}} amdgpu_kernel void @test_call_kernel(ptr addrspace(1) nocapture noundef writeonly align 4 %out) +// CHECK: define{{.*}} amdgpu_kernel void @test_call_kernel(ptr addrspace(1) nocapture noundef writeonly align 4 initializes((0, 4)) %out) // CHECK: store i32 4, ptr addrspace(1) %out, align 4 kernel void test_kernel(global int *out) diff --git a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl index f39589ada0a7034..2aeeb637795a9df 100644 --- a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl +++ b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl @@ -28,7 +28,7 @@ kernel void test_single(int_single input, global int* output) { // CHECK: spir_kernel // AMDGCN: define{{.*}} amdgpu_kernel void @test_single // CHECK: ptr nocapture {{.*}} byval(%struct.int_single) -// CHECK: ptr nocapture noundef writeonly align 4 %output +// CHECK: ptr nocapture noundef writeonly align 4 initializes((0, 4)) %output output[0] = input.a; } @@ -36,7 +36,7 @@ kernel void test_pair(int_pair input, global int* output) { // CHECK: spir_kernel // AMDGCN: define{{.*}} amdgpu_kernel void @test_pair // CHECK: ptr nocapture {{.*}} byval(%struct.int_pair) -// CHECK: ptr nocapture noundef writeonly align 4 %output +// CHECK: ptr nocapture noundef writeonly align 4 initializes((0, 8)) %output output[0] = (int)input.a; output[1] = (int)input.b; } @@ -45,7 +45,7 @@ kernel void test_kernel(test_struct input, global int* output) { // CHECK: spir_kernel // AMDGCN: define{{.*}} amdgpu_kernel void @test_kernel // CHECK: ptr nocapture {{.*}} byval(%struct.test_struct) -// CHECK: ptr nocapture noundef writeonly align 4 %output +// CHECK: ptr nocapture noundef writeonly align 4 initializes((0, 32)) %output output[0] = input.elementA; output[1] = input.elementB; output[2] = (int)input.elementC; @@ -59,7 +59,7 @@ kernel void test_kernel(test_struct input, global int* output) { void test_function(int_pair input, global int* output) { // CHECK-NOT: spir_kernel // AMDGCN-NOT: define{{.*}} amdgpu_kernel void @test_function -// CHECK: i64 %input.coerce0, i64 %input.coerce1, ptr nocapture noundef writeonly %output +// CHECK: i64 %input.coerce0, i64 %input.coerce1, ptr nocapture noundef writeonly initializes((0, 8)) %output output[0] = (int)input.a; output[1] = (int)input.b; } diff --git a/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp b/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp index db9d7eb3281fce2..86f21ee556ce830 100644 --- a/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp +++ b/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp @@ -2,7 +2,7 @@ //RUN: %clang_cc1 %s -triple spir -emit-llvm -O1 -o - | FileCheck %s // CHECK-LABEL: define dso_local spir_kernel void @test( -// CHECK-SAME: ptr addrspace(1) nocapture noundef readonly align 8 [[IN:%.*]], ptr addrspace(1) nocapture noundef writeonly align 8 [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { +// CHECK-SAME: ptr addrspace(1) nocapture noundef readonly align 8 [[IN:%.*]], ptr addrspace(1) nocapture noundef writeonly align 8 initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[IN]], i32 8 // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(1) [[ARRAYIDX1]], align 8, !tbaa [[TBAA7:![0-9]+]] diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 79746201133bddd..afb0ea72b269c8f 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -36,6 +37,7 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantRangeList.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" @@ -581,6 +583,200 @@ struct ArgumentUsesTracker : public CaptureTracker { const SCCNodeSet &SCCNodes; }; +/// A struct of argument use: a Use and the offset it accesses. This struct +/// is to track uses inside function via GEP. If GEP has a non-constant index, +/// the Offset field is nullopt. +struct ArgumentUse { + Use *U; + std::optional Offset; +}; + +/// A struct of argument access info. "Unknown" accesses are the cases like +/// unrecognized instructions, instructions that have more than one use of +/// the argument, or volatile memory accesses. "WriteWithSideEffect" are call +/// instructions that not only write an argument but also capture it. +struct ArgumentAccessInfo { + enum class AccessType : uint8_t { Write, WriteWithSideEffect, Read, Unknown }; + AccessType ArgAccessType; + ConstantRangeList AccessRanges; +}; + +/// A struct to wrap the argument use info per block. +struct UsesPerBlockInfo { + SmallDenseMap Insts; + bool HasWrites = false; + bool HasUnknownAccess = false; +}; + +/// A struct to summarize the argument use info in a function. +struct ArgumentUsesSummary { + bool HasAnyWrite = false; + bool HasWriteOutsideEntryBB = false; + SmallDenseMap UsesPerBlock; +}; + +ArgumentAccessInfo getArgmentAccessInfo(const Instruction *I, + const ArgumentUse &ArgUse, + const DataLayout &DL) { + auto GetTypeAccessRange = + [&DL](Type *Ty, + std::optional Offset) -> std::optional { + auto TypeSize = DL.getTypeStoreSize(Ty); + if (!TypeSize.isScalable() && Offset) { + int64_t Size = TypeSize.getFixedValue(); + return ConstantRange(APInt(64, *Offset, true), + APInt(64, *Offset + Size, true)); + } + return std::nullopt; + }; + auto GetConstantIntRange = + [](Value *Length, + std::optional Offset) -> std::optional { + auto *ConstantLength = dyn_cast(Length); + if (ConstantLength && Offset) + return ConstantRange( + APInt(64, *Offset, true), + APInt(64, *Offset + ConstantLength->getSExtValue(), true)); + return std::nullopt; + }; + if (auto *SI = dyn_cast(I)) { + if (SI->isSimple() && &SI->getOperandUse(1) == ArgUse.U) { + // Get the fixed type size of "SI". Since the access range of a write + // will be unioned, if "SI" doesn't have a fixed type size, we just set + // the access range to empty. + ConstantRangeList AccessRanges; + if (auto TypeAccessRange = + GetTypeAccessRange(SI->getAccessType(), ArgUse.Offset)) + AccessRanges.insert(*TypeAccessRange); + return {ArgumentAccessInfo::AccessType::Write, std::move(AccessRanges)}; + } + } else if (auto *LI = dyn_cast(I)) { + if (LI->isSimple()) { + assert(&LI->getOperandUse(0) == ArgUse.U); + // Get the fixed type size of "LI". Different from Write, if "LI" + // doesn't have a fixed type size, we conservatively set as a clobber + // with an empty access range. + if (auto TypeAccessRange = + GetTypeAccessRange(LI->getAccessType(), ArgUse.Offset)) + return {ArgumentAccessInfo::AccessType::Read, {*TypeAccessRange}}; + } + } else if (auto *MemSet = dyn_cast(I)) { + if (!MemSet->isVolatile()) { + ConstantRangeList AccessRanges; + if (auto AccessRange = + GetConstantIntRange(MemSet->getLength(), ArgUse.Offset)) + AccessRanges.insert(*AccessRange); + return {ArgumentAccessInfo::AccessType::Write, AccessRanges}; + } + } else if (auto *MTI = dyn_cast(I)) { + if (!MTI->isVolatile()) { + if (&MTI->getOperandUse(0) == ArgUse.U) { + ConstantRangeList AccessRanges; + if (auto AccessRange = + GetConstantIntRange(MTI->getLength(), ArgUse.Offset)) + AccessRanges.insert(*AccessRange); + return {ArgumentAccessInfo::AccessType::Write, AccessRanges}; + } else if (&MTI->getOperandUse(1) == ArgUse.U) { + if (auto AccessRange = + GetConstantIntRange(MTI->getLength(), ArgUse.Offset)) + return {ArgumentAccessInfo::AccessType::Read, {*AccessRange}}; + } + } + } else if (auto *CB = dyn_cast(I)) { + if (CB->isArgOperand(ArgUse.U)) { + unsigned ArgNo = CB->getArgOperandNo(ArgUse.U); + bool IsInitialize = CB->paramHasAttr(ArgNo, Attribute::Initializes); + // Argument is a Write when parameter is writeonly/readnone + // and nocapture. Otherwise, it's a WriteWithSideEffect. + auto Access = CB->onlyWritesMemory(ArgNo) && + CB->paramHasAttr(ArgNo, Attribute::NoCapture) + ? ArgumentAccessInfo::AccessType::Write + : ArgumentAccessInfo::AccessType::WriteWithSideEffect; + ConstantRangeList AccessRanges; + if (IsInitialize && ArgUse.Offset) { + Attribute Attr = CB->getParamAttr(ArgNo, Attribute::Initializes); + ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList(); + for (ConstantRange &CR : CBCRL) + AccessRanges.insert(ConstantRange(CR.getLower() + *ArgUse.Offset, + CR.getUpper() + *ArgUse.Offset)); + return {Access, AccessRanges}; + } + } + } + // Other unrecognized instructions are considered as unknown. + return {ArgumentAccessInfo::AccessType::Unknown, {}}; +} + +// Collect the uses of argument "A" in "F". +ArgumentUsesSummary collectArgumentUsesPerBlock(Argument &A, Function &F) { + auto &DL = F.getParent()->getDataLayout(); + unsigned PointerSize = + DL.getIndexSizeInBits(A.getType()->getPointerAddressSpace()); + ArgumentUsesSummary Result; + + BasicBlock &EntryBB = F.getEntryBlock(); + SmallVector Worklist; + for (Use &U : A.uses()) + Worklist.push_back({&U, 0}); + + // Update "UsesPerBlock" with the block of "I" as key and "Info" as value. + // Return true if the block of "I" has write accesses after updating. + auto UpdateUseInfo = [&Result](Instruction *I, ArgumentAccessInfo Info) { + auto *BB = I->getParent(); + auto &BBInfo = Result.UsesPerBlock[BB]; + bool AlreadyVisitedInst = BBInfo.Insts.contains(I); + auto &IInfo = BBInfo.Insts[I]; + + // Instructions that have more than one use of the argument are considered + // as clobbers. + if (AlreadyVisitedInst) { + IInfo = {ArgumentAccessInfo::AccessType::Unknown, {}}; + BBInfo.HasUnknownAccess = true; + return false; + } + + IInfo = std::move(Info); + BBInfo.HasUnknownAccess |= + IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown; + bool InfoHasWrites = + (IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write || + IInfo.ArgAccessType == + ArgumentAccessInfo::AccessType::WriteWithSideEffect) && + !IInfo.AccessRanges.empty(); + BBInfo.HasWrites |= InfoHasWrites; + return InfoHasWrites; + }; + + // No need for a visited set because we don't look through phis, so there are + // no cycles. + while (!Worklist.empty()) { + ArgumentUse ArgUse = Worklist.pop_back_val(); + User *U = ArgUse.U->getUser(); + // Add GEP uses to worklist. + // If the GEP is not a constant GEP, set the ArgumentUse::Offset to nullopt. + if (auto *GEP = dyn_cast(U)) { + std::optional NewOffset = std::nullopt; + if (ArgUse.Offset) { + APInt Offset(PointerSize, 0); + if (GEP->accumulateConstantOffset(DL, Offset)) + NewOffset = *ArgUse.Offset + Offset.getSExtValue(); + } + for (Use &U : GEP->uses()) + Worklist.push_back({&U, NewOffset}); + continue; + } + + auto *I = cast(U); + bool HasWrite = UpdateUseInfo(I, getArgmentAccessInfo(I, ArgUse, DL)); + + Result.HasAnyWrite |= HasWrite; + + if (HasWrite && I->getParent() != &EntryBB) + Result.HasWriteOutsideEntryBB = true; + } + return Result; +} + } // end anonymous namespace namespace llvm { @@ -867,9 +1063,129 @@ static bool addAccessAttr(Argument *A, Attribute::AttrKind R) { return true; } +static bool inferInitializes(Argument &A, Function &F) { + auto ArgumentUses = collectArgumentUsesPerBlock(A, F); + // No write anywhere in the function, bail. + if (!ArgumentUses.HasAnyWrite) + return false; + + auto &UsesPerBlock = ArgumentUses.UsesPerBlock; + BasicBlock &EntryBB = F.getEntryBlock(); + // A map to store the argument ranges initialized by a BasicBlock (including + // its successors). + DenseMap Initialized; + // Visit the successors of "BB" block and the instructions in BB (post-order) + // to get the argument ranges initialized by "BB" (including its successors). + // The result will be cached in "Initialized". + auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList { + auto UPB = UsesPerBlock.find(BB); + ConstantRangeList CRL; + + // Start with intersection of successors. + // If this block has any clobbering use, we're going to clear out the + // ranges at some point in this block anyway, so don't bother looking at + // successors. + if (UPB == UsesPerBlock.end() || !UPB->second.HasUnknownAccess) { + bool HasAddedSuccessor = false; + for (auto *Succ : successors(BB)) { + if (auto SuccI = Initialized.find(Succ); SuccI != Initialized.end()) { + if (HasAddedSuccessor) { + CRL = CRL.intersectWith(SuccI->second); + } else { + CRL = SuccI->second; + HasAddedSuccessor = true; + } + } else { + CRL = ConstantRangeList(); + break; + } + } + } + + if (UPB != UsesPerBlock.end()) { + // Sort uses in this block by instruction order. + SmallVector, 2> Insts; + append_range(Insts, UPB->second.Insts); + sort(Insts, [](std::pair &LHS, + std::pair &RHS) { + return LHS.first->comesBefore(RHS.first); + }); + + // From the end of the block to the beginning of the block, set + // initializes ranges. + for (auto &[_, Info] : reverse(Insts)) { + if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown || + Info.ArgAccessType == + ArgumentAccessInfo::AccessType::WriteWithSideEffect) + CRL = ConstantRangeList(); + if (!Info.AccessRanges.empty()) { + if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write || + Info.ArgAccessType == + ArgumentAccessInfo::AccessType::WriteWithSideEffect) { + CRL = CRL.unionWith(Info.AccessRanges); + } else { + assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read); + for (const auto &ReadRange : Info.AccessRanges) + CRL.subtract(ReadRange); + } + } + } + } + return CRL; + }; + + ConstantRangeList EntryCRL; + // If all write instructions are in the EntryBB, or if the EntryBB has + // a clobbering use, we only need to look at EntryBB. + bool OnlyScanEntryBlock = !ArgumentUses.HasWriteOutsideEntryBB; + if (!OnlyScanEntryBlock) + if (auto EntryUPB = UsesPerBlock.find(&EntryBB); + EntryUPB != UsesPerBlock.end()) + OnlyScanEntryBlock = EntryUPB->second.HasUnknownAccess; + if (OnlyScanEntryBlock) { + EntryCRL = VisitBlock(&EntryBB); + if (EntryCRL.empty()) + return false; + } else { + // Now we have to go through CFG to get the initialized argument ranges + // across blocks. With dominance and post-dominance, the initialized ranges + // by a block include both accesses inside this block and accesses in its + // (transitive) successors. So visit successors before predecessors with a + // post-order walk of the blocks and memorize the results in "Initialized". + for (const BasicBlock *BB : post_order(&F)) { + ConstantRangeList CRL = VisitBlock(BB); + if (!CRL.empty()) + Initialized[BB] = CRL; + } + + auto EntryCRLI = Initialized.find(&EntryBB); + if (EntryCRLI == Initialized.end()) + return false; + + EntryCRL = EntryCRLI->second; + } + + assert(!EntryCRL.empty() && + "should have bailed already if EntryCRL is empty"); + + if (A.hasAttribute(Attribute::Initializes)) { + ConstantRangeList PreviousCRL = + A.getAttribute(Attribute::Initializes).getValueAsConstantRangeList(); + if (PreviousCRL == EntryCRL) + return false; + EntryCRL = EntryCRL.unionWith(PreviousCRL); + } + + A.addAttr(Attribute::get(A.getContext(), Attribute::Initializes, + EntryCRL.rangesRef())); + + return true; +} + /// Deduce nocapture attributes for the SCC. static void addArgumentAttrs(const SCCNodeSet &SCCNodes, - SmallSet &Changed) { + SmallSet &Changed, + bool SkipInitializes) { ArgumentGraph AG; // Check each function in turn, determining which pointer arguments are not @@ -937,6 +1253,10 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes, if (addAccessAttr(&A, R)) Changed.insert(F); } + if (!SkipInitializes && !A.onlyReadsMemory()) { + if (inferInitializes(A, *F)) + Changed.insert(F); + } } } @@ -1910,13 +2230,16 @@ deriveAttrsInPostOrder(ArrayRef Functions, AARGetterT &&AARGetter, SmallSet Changed; if (ArgAttrsOnly) { - addArgumentAttrs(Nodes.SCCNodes, Changed); + // ArgAttrsOnly means to only infer attributes that may aid optimizations + // on the *current* function. "initializes" attribute is to aid + // optimizations (like DSE) on the callers, so skip "initializes" here. + addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/true); return Changed; } addArgumentReturnedAttrs(Nodes.SCCNodes, Changed); addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed); - addArgumentAttrs(Nodes.SCCNodes, Changed); + addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/false); inferConvergent(Nodes.SCCNodes, Changed); addNoReturnAttrs(Nodes.SCCNodes, Changed); addColdAttrs(Nodes.SCCNodes, Changed); diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll index bea56a72bdeaef1..8615363a985d111 100644 --- a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll +++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll @@ -15,7 +15,7 @@ define void @test0_yes(ptr %p) nounwind { ret void } -; CHECK: define void @test0_no(ptr nocapture writeonly %p) #1 { +; CHECK: define void @test0_no(ptr nocapture writeonly initializes((0, 4)) %p) #1 { define void @test0_no(ptr %p) nounwind { store i32 0, ptr %p, !tbaa !2 ret void diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll index 6b835bb4eef662e..317a069eed26e9d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll @@ -10,7 +10,7 @@ ; Should have call to sincos declarations, not calls to the asm pseudo-libcalls define protected amdgpu_kernel void @swdev456865(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, float noundef %x) #0 { ; CHECK-LABEL: define protected amdgpu_kernel void @swdev456865( -; CHECK-SAME: ptr addrspace(1) nocapture writeonly [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[I_I:%.*]] = call float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) #[[ATTR1:[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll index 1358d91ae102c9c..07587eaacd7034d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll @@ -49,7 +49,7 @@ declare float @_Z6sincosfPU3AS0f(float %x, ptr writeonly %ptr) #1 define void @sincos_f16_nocontract(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f16_nocontract -; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call half @_Z3sinDh(half [[X]]) ; CHECK-NEXT: store half [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 2 @@ -68,7 +68,7 @@ entry: define void @sincos_v2f16_nocontract(<2 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f16_nocontract -; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x half> @_Z3sinDv2_Dh(<2 x half> [[X]]) ; CHECK-NEXT: store <2 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 @@ -87,7 +87,7 @@ entry: define void @sincos_f16(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f16 -; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract half @_Z3sinDh(half [[X]]) ; CHECK-NEXT: store half [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 2 @@ -105,7 +105,7 @@ entry: define void @sincos_f16_order1(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f16_order1 -; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL1:%.*]] = tail call contract half @_Z3cosDh(half [[X]]) ; CHECK-NEXT: store half [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 2 @@ -123,7 +123,7 @@ entry: define void @sincos_v2f16(<2 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f16 -; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract <2 x half> @_Z3sinDv2_Dh(<2 x half> [[X]]) ; CHECK-NEXT: store <2 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 @@ -141,7 +141,7 @@ entry: define void @sincos_v3f16(<3 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v3f16 -; CHECK-SAME: (<3 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<3 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract <3 x half> @_Z3sinDv3_Dh(<3 x half> [[X]]) ; CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <3 x half> [[CALL]], <3 x half> poison, <4 x i32> @@ -164,7 +164,7 @@ entry: define void @sincos_v4f16(<4 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v4f16 -; CHECK-SAME: (<4 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<4 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract <4 x half> @_Z3sinDv4_Dh(<4 x half> [[X]]) ; CHECK-NEXT: store <4 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8 @@ -182,7 +182,7 @@ entry: define void @sincos_v8f16(<8 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v8f16 -; CHECK-SAME: (<8 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<8 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract <8 x half> @_Z3sinDv8_Dh(<8 x half> [[X]]) ; CHECK-NEXT: store <8 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 16 @@ -201,7 +201,7 @@ entry: define void @sincos_v16f16(<16 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v16f16 -; CHECK-SAME: (<16 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<16 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 32)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 32)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract <16 x half> @_Z3sinDv16_Dh(<16 x half> [[X]]) ; CHECK-NEXT: store <16 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 32 @@ -220,7 +220,7 @@ entry: define void @sincos_f32_nocontract(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_nocontract -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -240,7 +240,7 @@ entry: define void @sincos_v2f32_nocontract(<2 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f32_nocontract -; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { +; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call <2 x float> @_Z6sincosDv2_fPU3AS5S_(<2 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -260,7 +260,7 @@ entry: define void @sincos_f32(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -279,7 +279,7 @@ entry: define void @sincos_f32_order1(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_order1 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -298,7 +298,7 @@ entry: define void @sincos_v2f32(<2 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f32 -; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <2 x float> @_Z6sincosDv2_fPU3AS5S_(<2 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -317,7 +317,7 @@ entry: define void @sincos_v3f32(<3 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v3f32 -; CHECK-SAME: (<3 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (<3 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <3 x float>, align 16, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <3 x float> @_Z6sincosDv3_fPU3AS5S_(<3 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -340,7 +340,7 @@ entry: define void @sincos_v4f32(<4 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v4f32 -; CHECK-SAME: (<4 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (<4 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <4 x float>, align 16, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <4 x float> @_Z6sincosDv4_fPU3AS5S_(<4 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -359,7 +359,7 @@ entry: define void @sincos_v8f32(<8 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v8f32 -; CHECK-SAME: (<8 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (<8 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 32)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 32)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <8 x float>, align 32, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <8 x float> @_Z6sincosDv8_fPU3AS5S_(<8 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -378,7 +378,7 @@ entry: define void @sincos_v16f32(<16 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v16f32 -; CHECK-SAME: (<16 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (<16 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 64)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 64)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <16 x float>, align 64, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <16 x float> @_Z6sincosDv16_fPU3AS5S_(<16 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -397,7 +397,7 @@ entry: define void @sincos_f64_nocontract(double %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f64_nocontract -; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca double, align 8, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call double @_Z6sincosdPU3AS5d(double [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -417,7 +417,7 @@ entry: define void @sincos_v2f64_nocontract(<2 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f64_nocontract -; CHECK-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x double>, align 16, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @_Z6sincosDv2_dPU3AS5S_(<2 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -436,7 +436,7 @@ entry: define void @sincos_f64(double %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f64 -; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca double, align 8, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract double @_Z6sincosdPU3AS5d(double [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -455,7 +455,7 @@ entry: define void @sincos_f64_order1(double %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f64_order1 -; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca double, align 8, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract double @_Z6sincosdPU3AS5d(double [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -474,7 +474,7 @@ entry: define void @sincos_v2f64(<2 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f64 -; CHECK-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 16)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x double>, align 16, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <2 x double> @_Z6sincosDv2_dPU3AS5S_(<2 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -493,7 +493,7 @@ entry: define void @sincos_v3f64(<3 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v3f64 -; CHECK-SAME: (<3 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (<3 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 32)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 32)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <3 x double>, align 32, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <3 x double> @_Z6sincosDv3_dPU3AS5S_(<3 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -516,7 +516,7 @@ entry: define void @sincos_v4f64(<4 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v4f64 -; CHECK-SAME: (<4 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (<4 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 32)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 32)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <4 x double>, align 32, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <4 x double> @_Z6sincosDv4_dPU3AS5S_(<4 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -535,7 +535,7 @@ entry: define void @sincos_v8f64(<8 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v8f64 -; CHECK-SAME: (<8 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (<8 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 64)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 64)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <8 x double>, align 64, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <8 x double> @_Z6sincosDv8_dPU3AS5S_(<8 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -554,7 +554,7 @@ entry: define void @sincos_v16f64(<16 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v16f64 -; CHECK-SAME: (<16 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (<16 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 128)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 128)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <16 x double>, align 128, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract <16 x double> @_Z6sincosDv16_dPU3AS5S_(<16 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -607,7 +607,7 @@ bb1: ; The sin and cos are in different blocks but always execute define void @sincos_f32_different_blocks_dominating_always_execute(i1 %cond, float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out, ptr addrspace(1) %other) { ; CHECK-LABEL: define void @sincos_f32_different_blocks_dominating_always_execute -; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[OTHER:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[OTHER:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -639,7 +639,7 @@ bb1: ; sin dominates cos but cos doesn't always execute. define void @sincos_f32_different_blocks_dominating_conditional_execute(i1 %cond, float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out, ptr addrspace(1) %other) { ; CHECK-LABEL: define void @sincos_f32_different_blocks_dominating_conditional_execute -; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr addrspace(1) nocapture readnone [[OTHER:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]], ptr addrspace(1) nocapture readnone [[OTHER:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -685,7 +685,7 @@ declare void @func(ptr addrspace(1)) define void @sincos_f32_value_is_instr(ptr addrspace(1) %value.ptr, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_value_is_instr -; CHECK-SAME: (ptr addrspace(1) [[VALUE_PTR:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (ptr addrspace(1) [[VALUE_PTR:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: tail call void @func(ptr addrspace(1) [[VALUE_PTR]]) @@ -708,7 +708,7 @@ entry: define void @sincos_f32_value_is_same_constexpr(ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_value_is_same_constexpr -; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float bitcast (i32 ptrtoint (ptr @func to i32) to float), ptr addrspace(5) [[__SINCOS_]]) @@ -727,7 +727,7 @@ entry: define void @sincos_f32_value_is_different_constexpr(ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_value_is_different_constexpr -; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) #[[ATTR2]] { +; CHECK-SAME: (ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float bitcast (i32 ptrtoint (ptr @func to i32) to float)) ; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 @@ -745,7 +745,7 @@ entry: define void @sincos_f32_value_is_same_constantfp(ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_value_is_same_constantfp -; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float 4.200000e+01, ptr addrspace(5) [[__SINCOS_]]) @@ -764,7 +764,7 @@ entry: define void @sincos_f32_value_is_different_constantfp(ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_value_is_different_constantfp -; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float 4.200000e+01) ; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 @@ -782,7 +782,7 @@ entry: define void @sincos_f32_different_args(float %x, float %y, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_different_args -; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]) ; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 @@ -800,7 +800,7 @@ entry: define void @sincos_f32_flag_intersect0(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_flag_intersect0 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -819,7 +819,7 @@ entry: define void @sincos_f32_flag_intersect1(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_flag_intersect1 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call nnan contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -838,7 +838,7 @@ entry: define void @sincos_v2f32_flag_intersect1(<2 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f32_flag_intersect1 -; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call nnan contract <2 x float> @_Z6sincosDv2_fPU3AS5S_(<2 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -947,7 +947,7 @@ entry: define void @sin_f32_indirect_call_user(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out, ptr %func.ptr) { ; CHECK-LABEL: define void @sin_f32_indirect_call_user -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr nocapture readonly [[FUNC_PTR:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]], ptr nocapture readonly [[FUNC_PTR:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]) ; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 @@ -965,7 +965,7 @@ entry: define void @cos_f32_indirect_call_user(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out, ptr %func.ptr) { ; CHECK-LABEL: define void @cos_f32_indirect_call_user -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr nocapture readonly [[FUNC_PTR:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]], ptr nocapture readonly [[FUNC_PTR:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3cosf(float [[X]]) ; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[COS_OUT]], align 4 @@ -983,7 +983,7 @@ entry: define void @sincos_f32_preserve_fpmath_0(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_preserve_fpmath_0 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]), !fpmath [[META5:![0-9]+]] @@ -1002,7 +1002,7 @@ entry: define void @sincos_f32_preserve_fpmath_1(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_preserve_fpmath_1 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]), !fpmath [[META6:![0-9]+]] @@ -1022,7 +1022,7 @@ entry: ; Should drop the metadata define void @sincos_f32_drop_fpmath(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_drop_fpmath -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) @@ -1041,7 +1041,7 @@ entry: define void @sincos_f32_debuginfo(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) !dbg !15 { ; CHECK-LABEL: define void @sincos_f32_debuginfo -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] !dbg [[DBG7:![0-9]+]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] !dbg [[DBG7:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5), !dbg [[DBG14:![0-9]+]] ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]), !dbg [[DBG14]] @@ -1064,7 +1064,7 @@ entry: define float @sin_sincos_private_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sin_sincos_private_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[SIN0:%.*]] = tail call nnan ninf nsz contract float @_Z3sinf(float [[X]]), !fpmath [[META5]] @@ -1086,7 +1086,7 @@ entry: define float @sin_sincos_generic_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sin_sincos_generic_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[SIN0:%.*]] = tail call nsz contract float @_Z3sinf(float [[X]]), !fpmath [[META5]] @@ -1110,7 +1110,7 @@ entry: define float @cos_sincos_private_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @cos_sincos_private_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS0:%.*]] = tail call contract float @_Z3cosf(float [[X]]) @@ -1132,7 +1132,7 @@ entry: define float @cos_sincos_generic_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @cos_sincos_generic_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS0:%.*]] = tail call contract float @_Z3cosf(float [[X]]) @@ -1156,7 +1156,7 @@ entry: define float @sincos_private_f32_x2(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sincos_private_f32_x2 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS_TMP1:%.*]] = alloca float, align 4, addrspace(5) @@ -1184,7 +1184,7 @@ entry: define float @sincos_generic_f32_x2(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sincos_generic_f32_x2 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS_TMP1:%.*]] = alloca float, align 4, addrspace(5) @@ -1213,7 +1213,7 @@ entry: define float @sincos_generic_private_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sincos_generic_private_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS_TMP1:%.*]] = alloca float, align 4, addrspace(5) @@ -1240,7 +1240,7 @@ entry: define float @sincos_mixed_sin_cos_generic_private_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sincos_mixed_sin_cos_generic_private_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) diff --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-zero.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-zero.ll index 7f2a06af8d10f91..d3929a3706ba859 100644 --- a/llvm/test/CodeGen/BPF/preserve-static-offset/store-zero.ll +++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-zero.ll @@ -28,7 +28,7 @@ entry: ret void } -; CHECK: define dso_local void @bar(ptr nocapture noundef writeonly %[[p:.*]]) +; CHECK: define dso_local void @bar(ptr nocapture noundef writeonly initializes((0, 4)) %[[p:.*]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 0, ptr %[[p]], align 4, !tbaa ; CHECK-NEXT: ret void diff --git a/llvm/test/Other/optimize-inrange-gep.ll b/llvm/test/Other/optimize-inrange-gep.ll index e7465fddd80f0c3..66cf7f2c17f98e0 100644 --- a/llvm/test/Other/optimize-inrange-gep.ll +++ b/llvm/test/Other/optimize-inrange-gep.ll @@ -19,7 +19,7 @@ define void @foo(ptr %p) { ; O0-NEXT: ret void ; ; CHECK-LABEL: define void @foo( -; CHECK-SAME: ptr nocapture writeonly [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ptr nocapture writeonly initializes((0, 8)) [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: store ptr getelementptr inbounds inrange(-24, 0) (i8, ptr @vtable, i64 24), ptr [[P]], align 8 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/Coroutines/coro-async.ll b/llvm/test/Transforms/Coroutines/coro-async.ll index 3740c3d1d83871c..f02d0a242dc9927 100644 --- a/llvm/test/Transforms/Coroutines/coro-async.ll +++ b/llvm/test/Transforms/Coroutines/coro-async.ll @@ -116,7 +116,7 @@ define void @my_async_function_pa(ptr %ctxt, ptr %task, ptr %actor) { ; CHECK: @my_async_function_pa_fp = constant <{ i32, i32 }> <{ {{.*}}, i32 176 } ; CHECK: @my_async_function2_fp = constant <{ i32, i32 }> <{ {{.*}}, i32 176 } -; CHECK-LABEL: define swiftcc void @my_async_function(ptr swiftasync %async.ctxt, ptr %task, ptr %actor) +; CHECK-LABEL: define swiftcc void @my_async_function(ptr swiftasync initializes((152, 160)) %async.ctxt, ptr %task, ptr %actor) ; CHECK-O0-LABEL: define swiftcc void @my_async_function(ptr swiftasync %async.ctxt, ptr %task, ptr %actor) ; CHECK-SAME: !dbg ![[SP1:[0-9]+]] { ; CHECK: coro.return: @@ -249,7 +249,7 @@ define swiftcc void @top_level_caller(ptr %ctxt, ptr %task, ptr %actor) { ret void } -; CHECK-LABEL: define swiftcc void @top_level_caller(ptr %ctxt, ptr %task, ptr %actor) +; CHECK-LABEL: define swiftcc void @top_level_caller(ptr initializes((152, 160)) %ctxt, ptr %task, ptr %actor) ; CHECK: store ptr @my_async_functionTQ0_ ; CHECK: store ptr %ctxt ; CHECK: tail call swiftcc void @asyncSuspend @@ -410,7 +410,7 @@ entry: unreachable } -; CHECK-LABEL: define swiftcc void @polymorphic_suspend_return(ptr swiftasync %async.ctxt, ptr %task, ptr %actor) +; CHECK-LABEL: define swiftcc void @polymorphic_suspend_return(ptr swiftasync initializes((152, 160)) %async.ctxt, ptr %task, ptr %actor) ; CHECK-LABEL: define internal swiftcc void @polymorphic_suspend_return.resume.0(ptr {{.*}}swiftasync{{.*}} %0, ptr {{.*}}swiftself{{.*}} %1, ptr {{.*}}%2, ptr {{.*}}%3) ; CHECK: } diff --git a/llvm/test/Transforms/FunctionAttrs/argmemonly.ll b/llvm/test/Transforms/FunctionAttrs/argmemonly.ll index 10760e3b8b8b813..5bbe6fa7c27c2ee 100644 --- a/llvm/test/Transforms/FunctionAttrs/argmemonly.ll +++ b/llvm/test/Transforms/FunctionAttrs/argmemonly.ll @@ -101,7 +101,7 @@ entry: define void @test_only_write_arg(ptr %ptr) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; FNATTRS-LABEL: define void @test_only_write_arg -; FNATTRS-SAME: (ptr nocapture writeonly [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((0, 4)) [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { ; FNATTRS-NEXT: entry: ; FNATTRS-NEXT: store i32 0, ptr [[PTR]], align 4 ; FNATTRS-NEXT: ret void @@ -156,7 +156,7 @@ declare i32 @fn_readnone() readnone define void @test_call_readnone(ptr %ptr) { ; FNATTRS: Function Attrs: memory(argmem: write) ; FNATTRS-LABEL: define void @test_call_readnone -; FNATTRS-SAME: (ptr nocapture writeonly [[PTR:%.*]]) #[[ATTR7:[0-9]+]] { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((0, 4)) [[PTR:%.*]]) #[[ATTR7:[0-9]+]] { ; FNATTRS-NEXT: entry: ; FNATTRS-NEXT: [[C:%.*]] = call i32 @fn_readnone() ; FNATTRS-NEXT: store i32 [[C]], ptr [[PTR]], align 4 @@ -221,7 +221,7 @@ entry: define void @test_memcpy_argonly(ptr %dst, ptr %src) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; FNATTRS-LABEL: define void @test_memcpy_argonly -; FNATTRS-SAME: (ptr nocapture writeonly [[DST:%.*]], ptr nocapture readonly [[SRC:%.*]]) #[[ATTR9:[0-9]+]] { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((0, 32)) [[DST:%.*]], ptr nocapture readonly [[SRC:%.*]]) #[[ATTR9:[0-9]+]] { ; FNATTRS-NEXT: entry: ; FNATTRS-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 32, i1 false) ; FNATTRS-NEXT: ret void @@ -245,7 +245,7 @@ declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) define void @test_memcpy_src_global(ptr %dst) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(readwrite, inaccessiblemem: none) ; FNATTRS-LABEL: define void @test_memcpy_src_global -; FNATTRS-SAME: (ptr nocapture writeonly [[DST:%.*]]) #[[ATTR11:[0-9]+]] { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((0, 32)) [[DST:%.*]]) #[[ATTR11:[0-9]+]] { ; FNATTRS-NEXT: entry: ; FNATTRS-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr @arr, i64 32, i1 false) ; FNATTRS-NEXT: ret void @@ -370,7 +370,7 @@ define void @test_inaccessibleorargmemonly_readonly(ptr %arg) { define void @test_inaccessibleorargmemonly_readwrite(ptr %arg) { ; FNATTRS: Function Attrs: memory(argmem: write, inaccessiblemem: read) ; FNATTRS-LABEL: define void @test_inaccessibleorargmemonly_readwrite -; FNATTRS-SAME: (ptr nocapture writeonly [[ARG:%.*]]) #[[ATTR15:[0-9]+]] { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((0, 4)) [[ARG:%.*]]) #[[ATTR15:[0-9]+]] { ; FNATTRS-NEXT: store i32 0, ptr [[ARG]], align 4 ; FNATTRS-NEXT: call void @fn_inaccessiblememonly() #[[ATTR19]] ; FNATTRS-NEXT: ret void diff --git a/llvm/test/Transforms/FunctionAttrs/initializes.ll b/llvm/test/Transforms/FunctionAttrs/initializes.ll new file mode 100644 index 000000000000000..2aa8385fe4ca7b6 --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/initializes.ll @@ -0,0 +1,572 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --version 4 +; RUN: opt -passes=function-attrs -S < %s | FileCheck %s + +define void @basic(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @basic( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 8)) [[P:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + store i64 123, ptr %p + ret void +} + +define void @stores_on_both_paths(ptr %p, i1 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @stores_on_both_paths( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 8)) [[P:%.*]], i1 [[I:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[I]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i64 321, ptr [[P]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + br i1 %i, label %bb1, label %bb2 +bb1: + store i64 123, ptr %p + br label %end +bb2: + store i64 321, ptr %p + br label %end +end: + ret void +} + +define void @store_pointer_to_pointer(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @store_pointer_to_pointer( +; CHECK-SAME: ptr [[P:%.*]], ptr nocapture writeonly initializes((0, 8)) [[P2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store ptr [[P]], ptr [[P2]], align 8 +; CHECK-NEXT: ret void +; + store ptr %p, ptr %p2 + ret void +} + +; TODO: this is still initializes +define void @store_pointer_to_itself(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @store_pointer_to_itself( +; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store ptr [[P]], ptr [[P]], align 8 +; CHECK-NEXT: ret void +; + store ptr %p, ptr %p + ret void +} + +define void @load_before_store(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @load_before_store( +; CHECK-SAME: ptr nocapture [[P:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: store i32 123, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + %a = load i32, ptr %p + store i32 123, ptr %p + ret void +} + +define void @partial_load_before_store(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @partial_load_before_store( +; CHECK-SAME: ptr nocapture initializes((4, 8)) [[P:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + %a = load i32, ptr %p + store i64 123, ptr %p + ret void +} + +declare void @use(ptr) + +define void @call_clobber(ptr %p) { +; CHECK-LABEL: define void @call_clobber( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: call void @use(ptr [[P]]) +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + call void @use(ptr %p) + store i64 123, ptr %p + ret void +} + +define void @call_clobber_after_store(ptr %p) { +; CHECK-LABEL: define void @call_clobber_after_store( +; CHECK-SAME: ptr initializes((0, 8)) [[P:%.*]]) { +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: call void @use(ptr [[P]]) +; CHECK-NEXT: ret void +; + store i64 123, ptr %p + call void @use(ptr %p) + ret void +} + +define void @store_offset(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @store_offset( +; CHECK-SAME: ptr nocapture writeonly initializes((8, 12)) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 8 +; CHECK-NEXT: store i32 123, ptr [[G]], align 4 +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 8 + store i32 123, ptr %g + ret void +} + +define void @store_volatile(ptr %p) { +; CHECK: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) +; CHECK-LABEL: define void @store_volatile( +; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 8 +; CHECK-NEXT: store volatile i32 123, ptr [[G]], align 4 +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 8 + store volatile i32 123, ptr %g + ret void +} + +define void @merge_store_ranges(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @merge_store_ranges( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 8)) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 4 +; CHECK-NEXT: store i32 123, ptr [[G]], align 4 +; CHECK-NEXT: store i32 123, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 4 + store i32 123, ptr %g + store i32 123, ptr %p + ret void +} + +define void @partially_overlapping_stores_branches(ptr %p, i1 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @partially_overlapping_stores_branches( +; CHECK-SAME: ptr nocapture initializes((4, 8)) [[P:%.*]], i1 [[I:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]] +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 4 +; CHECK-NEXT: br i1 [[I]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i64 123, ptr [[G]], align 4 +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i64 321, ptr [[P]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %a = load i32, ptr %p + %g = getelementptr i8, ptr %p, i64 4 + br i1 %i, label %bb1, label %bb2 +bb1: + store i64 123, ptr %g + br label %end +bb2: + store i64 321, ptr %p + br label %end +end: + ret void +} + +define void @non_overlapping_stores_branches(ptr %p, i1 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @non_overlapping_stores_branches( +; CHECK-SAME: ptr nocapture writeonly [[P:%.*]], i1 [[I:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 8 +; CHECK-NEXT: br i1 [[I]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i64 123, ptr [[G]], align 4 +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i64 321, ptr [[P]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %g = getelementptr i8, ptr %p, i64 8 + br i1 %i, label %bb1, label %bb2 +bb1: + store i64 123, ptr %g + br label %end +bb2: + store i64 321, ptr %p + br label %end +end: + ret void +} + +define void @dominating_store(ptr %p, i1 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @dominating_store( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 8)) [[P:%.*]], i1 [[I:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[I]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: store i64 321, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; +entry: + br i1 %i, label %bb1, label %bb2 +bb1: + br label %end +bb2: + br label %end +end: + store i64 321, ptr %p + ret void +} + +define void @call_clobber_on_one_branch(ptr %p, i1 %i) { +; CHECK-LABEL: define void @call_clobber_on_one_branch( +; CHECK-SAME: ptr [[P:%.*]], i1 [[I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[I]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @use(ptr [[P]]) +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: store i64 321, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; +entry: + br i1 %i, label %bb1, label %bb2 +bb1: + br label %end +bb2: + call void @use(ptr %p) + br label %end +end: + store i64 321, ptr %p + ret void +} + +define void @merge_existing_initializes(ptr initializes((33, 36)) %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @merge_existing_initializes( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 8), (33, 36)) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + store i64 123, ptr %p + ret void +} + +define void @negative_offset(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @negative_offset( +; CHECK-SAME: ptr nocapture writeonly initializes((-5, 3)) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 -5 +; CHECK-NEXT: store i64 123, ptr [[G]], align 4 +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 -5 + store i64 123, ptr %g + ret void +} + +define void @non_const_gep(ptr %p, i64 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @non_const_gep( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 8)) [[P:%.*]], i64 [[I:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: store i64 123, ptr [[G]], align 4 +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 %i + store i64 123, ptr %g + store i64 123, ptr %p + ret void +} + +define void @call_clobber_in_entry_block(ptr %p, i1 %i) { +; CHECK-LABEL: define void @call_clobber_in_entry_block( +; CHECK-SAME: ptr [[P:%.*]], i1 [[I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @use(ptr [[P]]) +; CHECK-NEXT: br i1 [[I]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i64 123, ptr [[P]], align 4 +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i64 321, ptr [[P]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + call void @use(ptr %p) + br i1 %i, label %bb1, label %bb2 +bb1: + store i64 123, ptr %p + br label %end +bb2: + store i64 321, ptr %p + br label %end +end: + ret void +} + +declare void @g1(ptr initializes((0, 4)) %p) +declare void @g2(ptr initializes((8, 12)) %p) +declare void @g3(ptr initializes((0, 4)) writeonly nocapture %p) + +define void @call_initializes(ptr %p) { +; CHECK-LABEL: define void @call_initializes( +; CHECK-SAME: ptr initializes((0, 4)) [[P:%.*]]) { +; CHECK-NEXT: call void @g1(ptr [[P]]) +; CHECK-NEXT: ret void +; + call void @g1(ptr %p) + ret void +} + +define void @call_initializes_clobber(ptr %p) { +; CHECK-LABEL: define void @call_initializes_clobber( +; CHECK-SAME: ptr initializes((0, 4)) [[P:%.*]]) { +; CHECK-NEXT: call void @g1(ptr [[P]]) +; CHECK-NEXT: call void @g2(ptr [[P]]) +; CHECK-NEXT: ret void +; + call void @g1(ptr %p) + call void @g2(ptr %p) + ret void +} + +define void @call_initializes_no_clobber_writeonly_nocapture(ptr %p) { +; CHECK-LABEL: define void @call_initializes_no_clobber_writeonly_nocapture( +; CHECK-SAME: ptr initializes((0, 4), (8, 12)) [[P:%.*]]) { +; CHECK-NEXT: call void @g3(ptr [[P]]) +; CHECK-NEXT: call void @g2(ptr [[P]]) +; CHECK-NEXT: ret void +; + call void @g3(ptr %p) + call void @g2(ptr %p) + ret void +} + +define void @call_initializes_escape_bundle(ptr %p) { +; CHECK-LABEL: define void @call_initializes_escape_bundle( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: call void @g1(ptr [[P]]) [ "unknown"(ptr [[P]]) ] +; CHECK-NEXT: ret void +; + call void @g1(ptr %p) ["unknown"(ptr %p)] + ret void +} + +define void @access_bundle() { + %sink = alloca i64, align 8 + store i64 123, ptr %sink + ret void +} + +define void @call_operand_bundle(ptr %p) { +; CHECK-LABEL: define void @call_operand_bundle( +; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: call void @access_bundle() [ "unknown"(ptr [[P]]) ] +; CHECK-NEXT: ret void +; + call void @access_bundle() ["unknown"(ptr %p)] + ret void +} + +declare void @llvm.memset(ptr, i8, i64 ,i1) + +define void @memset(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @memset( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 9)) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 9, i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.memset(ptr %p, i8 2, i64 9, i1 false) + ret void +} + +define void @memset_offset(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @memset_offset( +; CHECK-SAME: ptr nocapture writeonly initializes((3, 12)) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[G]], i8 2, i64 9, i1 false) +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 3 + call void @llvm.memset(ptr %g, i8 2, i64 9, i1 false) + ret void +} + +define void @memset_volatile(ptr %p) { +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @memset_volatile( +; CHECK-SAME: ptr writeonly [[P:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 9, i1 true) +; CHECK-NEXT: ret void +; + call void @llvm.memset(ptr %p, i8 2, i64 9, i1 true) + ret void +} + +define void @memset_non_constant(ptr %p, i64 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; CHECK-LABEL: define void @memset_non_constant( +; CHECK-SAME: ptr nocapture writeonly [[P:%.*]], i64 [[I:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 [[I]], i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.memset(ptr %p, i8 2, i64 %i, i1 false) + ret void +} + +declare void @llvm.memcpy(ptr, ptr, i64 ,i1) + +define void @memcpy(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memcpy( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 9)) [[P:%.*]], ptr nocapture readonly [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P]], ptr [[P2]], i64 9, i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.memcpy(ptr %p, ptr %p2, i64 9, i1 false) + ret void +} + +define void @memcpy_volatile(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memcpy_volatile( +; CHECK-SAME: ptr writeonly [[P:%.*]], ptr readonly [[P2:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P]], ptr [[P2]], i64 9, i1 true) +; CHECK-NEXT: ret void +; + call void @llvm.memcpy(ptr %p, ptr %p2, i64 9, i1 true) + ret void +} + +define void @memcpy_offset(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memcpy_offset( +; CHECK-SAME: ptr nocapture writeonly initializes((3, 12)) [[P:%.*]], ptr nocapture readonly [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[G]], ptr [[P2]], i64 9, i1 false) +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 3 + call void @llvm.memcpy(ptr %g, ptr %p2, i64 9, i1 false) + ret void +} + +define void @memcpy_src(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memcpy_src( +; CHECK-SAME: ptr nocapture initializes((96, 128)) [[P:%.*]], ptr nocapture initializes((0, 96)) [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P2]], ptr [[P]], i64 96, i1 false) +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 64 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[G]], ptr [[P2]], i64 64, i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.memcpy(ptr %p2, ptr %p, i64 96, i1 false) + %g = getelementptr i8, ptr %p, i64 64 + call void @llvm.memcpy(ptr %g, ptr %p2, i64 64, i1 false) + ret void +} + +define void @memcpy_non_constant(ptr %p, ptr %p2, i64 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memcpy_non_constant( +; CHECK-SAME: ptr nocapture writeonly [[P:%.*]], ptr nocapture readonly [[P2:%.*]], i64 [[I:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P]], ptr [[P2]], i64 [[I]], i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.memcpy(ptr %p, ptr %p2, i64 %i, i1 false) + ret void +} + +declare void @llvm.memmove(ptr, ptr, i64 ,i1) + +define void @memmove(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memmove( +; CHECK-SAME: ptr nocapture writeonly initializes((0, 9)) [[P:%.*]], ptr nocapture readonly [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[P]], ptr [[P2]], i64 9, i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.memmove(ptr %p, ptr %p2, i64 9, i1 false) + ret void +} + +define void @memmove_volatile(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memmove_volatile( +; CHECK-SAME: ptr writeonly [[P:%.*]], ptr readonly [[P2:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[P]], ptr [[P2]], i64 9, i1 true) +; CHECK-NEXT: ret void +; + call void @llvm.memmove(ptr %p, ptr %p2, i64 9, i1 true) + ret void +} + +define void @memmove_offset(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memmove_offset( +; CHECK-SAME: ptr nocapture writeonly initializes((3, 12)) [[P:%.*]], ptr nocapture readonly [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[G]], ptr [[P2]], i64 9, i1 false) +; CHECK-NEXT: ret void +; + %g = getelementptr i8, ptr %p, i64 3 + call void @llvm.memmove(ptr %g, ptr %p2, i64 9, i1 false) + ret void +} + +define void @memmove_src(ptr %p, ptr %p2) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memmove_src( +; CHECK-SAME: ptr nocapture initializes((96, 128)) [[P:%.*]], ptr nocapture initializes((0, 96)) [[P2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[P2]], ptr [[P]], i64 96, i1 false) +; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[P]], i64 64 +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[G]], ptr [[P2]], i64 64, i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.memmove(ptr %p2, ptr %p, i64 96, i1 false) + %g = getelementptr i8, ptr %p, i64 64 + call void @llvm.memmove(ptr %g, ptr %p2, i64 64, i1 false) + ret void +} + +define void @memmove_non_constant(ptr %p, ptr %p2, i64 %i) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @memmove_non_constant( +; CHECK-SAME: ptr nocapture writeonly [[P:%.*]], ptr nocapture readonly [[P2:%.*]], i64 [[I:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[P]], ptr [[P2]], i64 [[I]], i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.memmove(ptr %p, ptr %p2, i64 %i, i1 false) + ret void +} diff --git a/llvm/test/Transforms/FunctionAttrs/readattrs.ll b/llvm/test/Transforms/FunctionAttrs/readattrs.ll index 39513976f90d76e..004c0485d764aea 100644 --- a/llvm/test/Transforms/FunctionAttrs/readattrs.ll +++ b/llvm/test/Transforms/FunctionAttrs/readattrs.ll @@ -107,7 +107,7 @@ define void @test4_2(ptr %p) { define void @test5(ptr %p, ptr %q) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; FNATTRS-LABEL: define {{[^@]+}}@test5 -; FNATTRS-SAME: (ptr nocapture writeonly [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR4:[0-9]+]] { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((0, 8)) [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR4:[0-9]+]] { ; FNATTRS-NEXT: store ptr [[Q]], ptr [[P]], align 8 ; FNATTRS-NEXT: ret void ; @@ -132,7 +132,7 @@ declare void @test6_1() ; This is not a missed optz'n. define void @test6_2(ptr %p, ptr %q) { ; FNATTRS-LABEL: define {{[^@]+}}@test6_2 -; FNATTRS-SAME: (ptr nocapture writeonly [[P:%.*]], ptr [[Q:%.*]]) { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((0, 8)) [[P:%.*]], ptr [[Q:%.*]]) { ; FNATTRS-NEXT: store ptr [[Q]], ptr [[P]], align 8 ; FNATTRS-NEXT: call void @test6_1() ; FNATTRS-NEXT: ret void diff --git a/llvm/test/Transforms/FunctionAttrs/writeonly.ll b/llvm/test/Transforms/FunctionAttrs/writeonly.ll index de2d5e223894763..ba546aff6e6211e 100644 --- a/llvm/test/Transforms/FunctionAttrs/writeonly.ll +++ b/llvm/test/Transforms/FunctionAttrs/writeonly.ll @@ -66,7 +66,7 @@ nouses-argworn-funwo_entry: define void @test_store(ptr %p) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; FNATTRS-LABEL: define {{[^@]+}}@test_store -; FNATTRS-SAME: (ptr nocapture writeonly [[P:%.*]]) #[[ATTR3:[0-9]+]] { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((0, 1)) [[P:%.*]]) #[[ATTR3:[0-9]+]] { ; FNATTRS-NEXT: store i8 0, ptr [[P]], align 1 ; FNATTRS-NEXT: ret void ; @@ -107,7 +107,7 @@ define i8 @test_store_capture(ptr %p) { define void @test_addressing(ptr %p) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; FNATTRS-LABEL: define {{[^@]+}}@test_addressing -; FNATTRS-SAME: (ptr nocapture writeonly [[P:%.*]]) #[[ATTR3]] { +; FNATTRS-SAME: (ptr nocapture writeonly initializes((8, 12)) [[P:%.*]]) #[[ATTR3]] { ; FNATTRS-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P]], i64 8 ; FNATTRS-NEXT: store i32 0, ptr [[GEP]], align 4 ; FNATTRS-NEXT: ret void diff --git a/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll index 1c9e7a771ca19c7..ec0c2b40640f493 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll @@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu" define void @foo(ptr %a, <32 x i8> %_0) #0 { ; CHECK-LABEL: define void @foo( -; CHECK-SAME: ptr nocapture writeonly [[A:%.*]], <32 x i8> [[_0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ptr nocapture writeonly initializes((0, 32)) [[A:%.*]], <32 x i8> [[_0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: start: ; CHECK-NEXT: store <32 x i8> [[_0]], ptr [[A]], align 1 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll b/llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll index bd910b82496fd1d..5e6eab9d8073682 100644 --- a/llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll +++ b/llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll @@ -10,7 +10,7 @@ define void @memcpy_forward_back_with_offset(ptr %arg) { ; CUSTOM-NEXT: ret void ; ; O2-LABEL: define void @memcpy_forward_back_with_offset( -; O2-SAME: ptr nocapture writeonly [[ARG:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; O2-SAME: ptr nocapture writeonly initializes((0, 1)) [[ARG:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; O2-NEXT: store i8 1, ptr [[ARG]], align 1 ; O2-NEXT: ret void ; diff --git a/llvm/test/Transforms/PhaseOrdering/pr95152.ll b/llvm/test/Transforms/PhaseOrdering/pr95152.ll index fff94673a1a5190..016460fed7c3505 100644 --- a/llvm/test/Transforms/PhaseOrdering/pr95152.ll +++ b/llvm/test/Transforms/PhaseOrdering/pr95152.ll @@ -21,7 +21,7 @@ define void @j(ptr %p) optnone noinline { define void @h(ptr %p) { ; CHECK-LABEL: define void @h( -; CHECK-SAME: ptr [[P:%.*]]) local_unnamed_addr { +; CHECK-SAME: ptr initializes((0, 8)) [[P:%.*]]) local_unnamed_addr { ; CHECK-NEXT: store i64 3, ptr [[P]], align 4 ; CHECK-NEXT: tail call void @j(ptr nonnull [[P]]) ; CHECK-NEXT: ret void @@ -33,7 +33,7 @@ define void @h(ptr %p) { define void @g(ptr dead_on_unwind noalias writable dereferenceable(8) align 8 %p) minsize { ; CHECK-LABEL: define void @g( -; CHECK-SAME: ptr dead_on_unwind noalias nocapture writable writeonly align 8 dereferenceable(8) [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-SAME: ptr dead_on_unwind noalias nocapture writable writeonly align 8 dereferenceable(8) initializes((0, 8)) [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: tail call void @h(ptr nonnull [[P]]) ; CHECK-NEXT: ret void ; @@ -45,7 +45,7 @@ define void @g(ptr dead_on_unwind noalias writable dereferenceable(8) align 8 %p define void @f(ptr dead_on_unwind noalias %p) { ; CHECK-LABEL: define void @f( -; CHECK-SAME: ptr dead_on_unwind noalias [[P:%.*]]) local_unnamed_addr { +; CHECK-SAME: ptr dead_on_unwind noalias initializes((0, 8)) [[P:%.*]]) local_unnamed_addr { ; CHECK-NEXT: store i64 3, ptr [[P]], align 4 ; CHECK-NEXT: tail call void @j(ptr nonnull align 8 dereferenceable(8) [[P]]) ; CHECK-NEXT: store i64 43, ptr [[P]], align 4 From a432f11a52dd5ec21a3438bdaa8f623e32a3234c Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 21 Nov 2024 11:57:15 +1100 Subject: [PATCH 002/351] [JITLink][arm64] Support arm64e JIT'd code (initially enabled for MachO only). Adds two new JITLink passes to create and populate a pointer-signing function that can be called via an allocation-action attached to the LinkGraph: * createEmptyPointerSigningFunction creates a pointer signing function in a custome section, reserving sufficient space for the signing code. It should be run as a post-prune pass (to ensure that memory is reserved prior to allocation). * lowerPointer64AuthEdgesToSigningFunction pass populates the signing function by walking the graph, decoding the ptrauth info (encoded in the edge addend) and writing an instruction sequence to sign all ptrauth fixup locations. rdar://61956998 --- .../llvm/ExecutionEngine/JITLink/aarch64.h | 53 ++++ .../ExecutionEngine/JITLink/MachO_arm64.cpp | 36 ++- llvm/lib/ExecutionEngine/JITLink/aarch64.cpp | 276 ++++++++++++++++++ .../JITLink/AArch64/MachO_ptrauth-globals.s | 158 ++++++++++ 4 files changed, 520 insertions(+), 3 deletions(-) create mode 100644 llvm/test/ExecutionEngine/JITLink/AArch64/MachO_ptrauth-globals.s diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h index e8c3e3414dce0c8..db440c378d24ff4 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h @@ -31,6 +31,36 @@ enum EdgeKind_aarch64 : Edge::Kind { /// Pointer64 = Edge::FirstRelocation, + /// An arm64e authenticated pointer relocation. The addend contains a 64-bit + /// struct containing the authentication parameters: + /// + /// Addend encoding: + /// int32_t addend; + /// uint16_t diversityData; + /// uint16_t hasAddressDiversity : 1; + /// uint16_t key : 2; + /// uint16_t zeroes : 12; + /// uint16_t authenticated : 1; + /// + /// Note: This means that the addend cannot be interpreted as a plain offset + /// prior to lowering. + /// + /// Authenticated pointer edges cannot be fixed up directly by JITLink as the + /// signing keys are held in the executing process. They can be removed from + /// the graph by a combination of the createEmptyPointerSigningFunction pass + /// (post-prune) and the lowerPointer64AuthEdgesToSigningFunction pass + /// (pre-fixup). Together these passes construct a signing function that will + /// be run in the executing process to write the signed pointers to the fixup + /// locations. + /// + /// Fixup expression: + /// NONE + /// + /// Errors: + /// - Failure to handle edges of this kind prior to the fixup phase will + /// result in an unsupported error during the fixup phase. + Pointer64Authenticated, + /// A plain 32-bit pointer value relocation. /// /// Fixup expression: @@ -832,6 +862,29 @@ class PLTTableManager : public TableManager { Section *StubsSection = nullptr; }; +/// Returns the name of the pointer signing function section. +const char *getPointerSigningFunctionSectionName(); + +/// Creates a pointer signing function section, block, and symbol to reserve +/// space for a signing function for this LinkGraph. Clients should insert this +/// pass in the post-prune phase, and add the paired +/// lowerPointer64AuthEdgesToSigningFunction pass to the pre-fixup phase. +/// +/// No new Pointer64Auth edges can be inserted into the graph between when this +/// pass is run and when the pass below runs (since there will not be sufficient +/// space reserved in the signing function to write the signing code for them). +Error createEmptyPointerSigningFunction(LinkGraph &G); + +/// Given a LinkGraph containing Pointer64Authenticated edges, transform those +/// edges to Pointer64 and add signing code to the pointer signing function +/// (which must already have been created by the +/// createEmptyPointerSigningFunction pass above). +/// +/// This function will add a $__ptrauth_sign section with finalization-lifetime +/// containing an anonymous function that will sign all pointers in the graph. +/// An allocation action will be added to run this function during finalization. +Error lowerPointer64AuthEdgesToSigningFunction(LinkGraph &G); + } // namespace aarch64 } // namespace jitlink } // namespace llvm diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp index 125c6373f82d9f5..5607963e3774327 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp @@ -28,8 +28,8 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { public: MachOLinkGraphBuilder_arm64(const object::MachOObjectFile &Obj, SubtargetFeatures Features) - : MachOLinkGraphBuilder(Obj, Triple("arm64-apple-darwin"), - std::move(Features), aarch64::getEdgeKindName), + : MachOLinkGraphBuilder(Obj, getObjectTriple(Obj), std::move(Features), + aarch64::getEdgeKindName), NumSymbols(Obj.getSymtabLoadCommand().nsyms) {} private: @@ -38,6 +38,7 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { MachOPointer32, MachOPointer64, MachOPointer64Anon, + MachOPointer64Authenticated, MachOPage21, MachOPageOffset12, MachOGOTPage21, @@ -53,6 +54,18 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { MachONegDelta64, }; + static Triple getObjectTriple(const object::MachOObjectFile &Obj) { + // Get the CPU sub-type from the header. + // jitLink_MachO should already have validated that the buffer is big enough + // to cover a mach_header64 so this is safe. + uint32_t CPUSubType = + *(const support::ulittle32_t *)(Obj.getData().data() + 8); + CPUSubType &= ~MachO::CPU_SUBTYPE_MASK; + if (CPUSubType == MachO::CPU_SUBTYPE_ARM64E) + return Triple("arm64e-apple-darwin"); + return Triple("arm64-apple-darwin"); + } + static Expected getRelocationKind(const MachO::relocation_info &RI) { switch (RI.r_type) { @@ -103,6 +116,10 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { if (!RI.r_pcrel && !RI.r_extern && RI.r_length == 2) return MachOPairedAddend; break; + case MachO::ARM64_RELOC_AUTHENTICATED_POINTER: + if (!RI.r_pcrel && RI.r_extern && RI.r_length == 3) + return MachOPointer64Authenticated; + break; case MachO::ARM64_RELOC_TLVP_LOAD_PAGE21: if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) return MachOTLVPage21; @@ -366,12 +383,15 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { Kind = aarch64::Pointer32; break; case MachOPointer64: + case MachOPointer64Authenticated: if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) TargetSymbol = TargetSymbolOrErr->GraphSymbol; else return TargetSymbolOrErr.takeError(); Addend = *(const ulittle64_t *)FixupContent; - Kind = aarch64::Pointer64; + Kind = *MachORelocKind == MachOPointer64 + ? aarch64::Pointer64 + : aarch64::Pointer64Authenticated; break; case MachOPointer64Anon: { orc::ExecutorAddr TargetAddress(*(const ulittle64_t *)FixupContent); @@ -493,6 +513,8 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { return "MachOPointer64"; case MachOPointer64Anon: return "MachOPointer64Anon"; + case MachOPointer64Authenticated: + return "MachOPointer64Authenticated"; case MachOPage21: return "MachOPage21"; case MachOPageOffset12: @@ -601,6 +623,14 @@ void link_MachO_arm64(std::unique_ptr G, // Add an in-place GOT/Stubs pass. Config.PostPrunePasses.push_back(buildTables_MachO_arm64); + + // If this is an arm64e graph then add pointer signing passes. + if (G->getTargetTriple().isArm64e()) { + Config.PostPrunePasses.push_back( + aarch64::createEmptyPointerSigningFunction); + Config.PreFixupPasses.push_back( + aarch64::lowerPointer64AuthEdgesToSigningFunction); + } } if (auto Err = Ctx->modifyPassConfig(*G, Config)) diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp index 4d3c19574a23cc3..a79dbd5e4494f40 100644 --- a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp @@ -12,6 +12,8 @@ #include "llvm/ExecutionEngine/JITLink/aarch64.h" +#include "llvm/Support/BinaryStreamWriter.h" + #define DEBUG_TYPE "jitlink" namespace llvm { @@ -80,6 +82,280 @@ const char *getEdgeKindName(Edge::Kind R) { } } +// Write a 64-bit GPR -> GPR move. +template +static Error writeMovRegRegSeq(AppendFtor &Append, uint64_t DstReg, + uint64_t SrcReg) { + assert(DstReg < 32 && "Dst reg out of range"); + assert(SrcReg < 32 && "Src reg out of range"); + + if (DstReg == SrcReg) + return Error::success(); + + constexpr uint32_t MOVGPR64Template = 0xaa0003e0; + constexpr uint32_t DstRegIndex = 0; + constexpr uint32_t SrcRegIndex = 16; + uint32_t Instr = MOVGPR64Template; + Instr |= DstReg << DstRegIndex; + Instr |= SrcReg << SrcRegIndex; + return Append(Instr); +} + +// Generate a sequence of imm writes to assign the given value. +template +static Error writeMovRegImm64Seq(AppendFtor &Append, uint64_t Reg, + uint64_t Imm) { + assert(Reg < 32 && "Invalid register number"); + + constexpr uint32_t MovRegImm64Template = 0xd2800000; + constexpr unsigned PreserveBitIndex = 29; + constexpr unsigned ShiftBitsIndex = 21; + constexpr unsigned ImmBitsIndex = 5; + + bool PreserveRegValue = false; + for (unsigned I = 0; I != 4; ++I) { + uint32_t ImmBits = Imm & 0xffff; + Imm >>= 16; + + // Skip any all-zero immediates after the first one. + if (PreserveRegValue && !ImmBits) + continue; + + uint32_t Instr = MovRegImm64Template; + Instr |= PreserveRegValue << PreserveBitIndex; + Instr |= (I << ShiftBitsIndex); + Instr |= ImmBits << ImmBitsIndex; + Instr |= Reg; + if (auto Err = Append(Instr)) + return Err; + PreserveRegValue = true; + } + + return Error::success(); +} + +template +static Error +writePACSignSeq(AppendFtor &Append, unsigned DstReg, orc::ExecutorAddr RawAddr, + unsigned RawAddrReg, unsigned DiscriminatorReg, unsigned Key, + uint64_t EncodedDiscriminator, bool AddressDiversify) { + assert(DstReg < 32 && "DstReg out of range"); + assert(RawAddrReg < 32 && "AddrReg out of range"); + assert(DiscriminatorReg < 32 && "DiscriminatorReg out of range"); + assert(EncodedDiscriminator < 0x10000 && "EncodedDiscriminator out of range"); + + if (AddressDiversify) { + // Move the address into the discriminator register. + if (auto Err = writeMovRegRegSeq(Append, DiscriminatorReg, RawAddrReg)) + return Err; + // Blend encoded discriminator if there is one. + if (EncodedDiscriminator) { + constexpr uint32_t MOVKTemplate = 0xf2e00000; + constexpr unsigned ImmIndex = 5; + uint32_t BlendInstr = MOVKTemplate; + BlendInstr |= EncodedDiscriminator << ImmIndex; + BlendInstr |= DiscriminatorReg; + if (auto Err = Append(BlendInstr)) + return Err; + } + } else if (EncodedDiscriminator) { + // Move the encoded discriminator into the discriminator register. + if (auto Err = + writeMovRegImm64Seq(Append, DiscriminatorReg, EncodedDiscriminator)) + return Err; + } else + DiscriminatorReg = 31; // WZR + + constexpr uint32_t PACTemplate = 0xdac10000; + constexpr unsigned ZBitIndex = 13; + constexpr unsigned KeyIndex = 10; + constexpr unsigned DiscriminatorRegIndex = 5; + + uint32_t Instr = PACTemplate; + Instr |= (DiscriminatorReg == 31) << ZBitIndex; + Instr |= Key << KeyIndex; + Instr |= DiscriminatorReg << DiscriminatorRegIndex; + Instr |= DstReg; + + return Append(Instr); +} + +template +static Error writeStoreRegSeq(AppendFtor &Append, unsigned DstLocReg, + unsigned SrcReg) { + assert(DstLocReg < 32 && "DstLocReg out of range"); + assert(SrcReg < 32 && "SrcReg out of range"); + + constexpr uint32_t STRTemplate = 0xf9000000; + constexpr unsigned DstLocRegIndex = 5; + constexpr unsigned SrcRegIndex = 0; + + uint32_t Instr = STRTemplate; + Instr |= DstLocReg << DstLocRegIndex; + Instr |= SrcReg << SrcRegIndex; + + return Append(Instr); +} + +const char *getPointerSigningFunctionSectionName() { return "$__ptrauth_sign"; } + +/// Creates a pointer signing function section, block, and symbol to reserve +/// space for a signing function for this LinkGraph. Clients should insert this +/// pass in the post-prune phase, and add the paired +/// lowerPointer64AuthEdgesToSigningFunction pass to the pre-fixup phase. +Error createEmptyPointerSigningFunction(LinkGraph &G) { + LLVM_DEBUG({ + dbgs() << "Creating empty pointer signing function for " << G.getName() + << "\n"; + }); + + // FIXME: We could put a tighter bound on this if we inspected the ptrauth + // info encoded in the addend -- the only actually unknown quantity is the + // fixup location, and we can probably put constraints even on that. + size_t NumPtrAuthFixupLocations = 0; + for (auto *B : G.blocks()) + for (auto &E : B->edges()) + NumPtrAuthFixupLocations += + E.getKind() == aarch64::Pointer64Authenticated; + + constexpr size_t MaxPtrSignSeqLength = + 4 + // To materialize the value to sign. + 4 + // To materialize the fixup location. + 3 + // To copy, blend discriminator, and sign + 1; // To store the result. + + // The maximum number of signing instructions required is the maximum per + // location, times the number of locations, plus three instructions to + // materialize the return value and return. + size_t NumSigningInstrs = NumPtrAuthFixupLocations * MaxPtrSignSeqLength + 3; + + // Create signing function section. + auto &SigningSection = + G.createSection(getPointerSigningFunctionSectionName(), + orc::MemProt::Read | orc::MemProt::Exec); + SigningSection.setMemLifetime(orc::MemLifetime::Finalize); + + size_t SigningFunctionSize = NumSigningInstrs * 4; + auto &SigningFunctionBlock = G.createMutableContentBlock( + SigningSection, G.allocateBuffer(SigningFunctionSize), + orc::ExecutorAddr(), 4, 0); + G.addAnonymousSymbol(SigningFunctionBlock, 0, SigningFunctionBlock.getSize(), + true, true); + + LLVM_DEBUG({ + dbgs() << " " << NumPtrAuthFixupLocations << " location(s) to sign, up to " + << NumSigningInstrs << " instructions required (" + << formatv("{0:x}", SigningFunctionBlock.getSize()) << " bytes)\n"; + }); + + return Error::success(); +} + +/// Given a LinkGraph containing Pointer64Auth edges, transform those edges to +/// Pointer64 and add code to sign the pointers in the executor. +/// +/// This function will add a $__ptrauth_sign section with finalization-lifetime +/// containing an anonymous function that will sign all pointers in the graph. +/// An allocation action will be added to run this function during finalization. +Error lowerPointer64AuthEdgesToSigningFunction(LinkGraph &G) { + LLVM_DEBUG({ + dbgs() << "Writing pointer signing function for " << G.getName() << "\n"; + }); + + constexpr unsigned Reg1 = 8; // Holds pointer value to sign. + constexpr unsigned Reg2 = 9; // Holds fixup address. + constexpr unsigned Reg3 = 10; // Temporary for discriminator value if needed. + + // Find the signing function. + auto *SigningSection = + G.findSectionByName(getPointerSigningFunctionSectionName()); + assert(SigningSection && "Siging section missing"); + assert(SigningSection->blocks_size() == 1 && + "Unexpected number of blocks in signing section"); + assert(SigningSection->symbols_size() == 1 && + "Unexpected number of symbols in signing section"); + + auto &SigningFunctionSym = **SigningSection->symbols().begin(); + auto &SigningFunctionBlock = SigningFunctionSym.getBlock(); + auto SigningFunctionBuf = SigningFunctionBlock.getAlreadyMutableContent(); + + // Write the instructions to the block content. + BinaryStreamWriter InstrWriter( + {reinterpret_cast(SigningFunctionBuf.data()), + SigningFunctionBuf.size()}, + G.getEndianness()); + + auto AppendInstr = [&](uint32_t Instr) { + return InstrWriter.writeInteger(Instr); + }; + + for (auto *B : G.blocks()) { + for (auto EI = B->edges().begin(); EI != B->edges().end();) { + auto &E = *EI; + if (E.getKind() == aarch64::Pointer64Authenticated) { + uint64_t EncodedInfo = E.getAddend(); + int32_t RealAddend = (uint32_t)(EncodedInfo & 0xffffffff); + uint32_t InitialDiscriminator = (EncodedInfo >> 32) & 0xffff; + bool AddressDiversify = (EncodedInfo >> 48) & 0x1; + uint32_t Key = (EncodedInfo >> 49) & 0x3; + uint32_t HighBits = EncodedInfo >> 51; + auto ValueToSign = E.getTarget().getAddress() + RealAddend; + + if (HighBits != 0x1000) + return make_error( + "Pointer64Auth edge at " + + formatv("{0:x}", B->getFixupAddress(E).getValue()) + + " has invalid encoded addend " + formatv("{0:x}", EncodedInfo)); + +#ifndef NDEBUG + const char *const KeyNames[] = {"IA", "IB", "DA", "DB"}; +#endif // NDEBUG + LLVM_DEBUG({ + dbgs() << " " << B->getFixupAddress(E) << " <- " << ValueToSign + << " : key = " << KeyNames[Key] << ", discriminator = " + << formatv("{0:x4}", InitialDiscriminator) + << ", address diversified = " + << (AddressDiversify ? "yes" : "no") << "\n"; + }); + + // Materialize pointer value. + cantFail( + writeMovRegImm64Seq(AppendInstr, Reg1, ValueToSign.getValue())); + + // Materialize fixup pointer. + cantFail(writeMovRegImm64Seq(AppendInstr, Reg2, + B->getFixupAddress(E).getValue())); + + // Write signing instruction(s). + cantFail(writePACSignSeq(AppendInstr, Reg1, ValueToSign, Reg2, Reg3, + Key, InitialDiscriminator, AddressDiversify)); + + // Store signed pointer. + cantFail(writeStoreRegSeq(AppendInstr, Reg2, Reg1)); + + // Remove this edge. + EI = B->removeEdge(EI); + } else + ++EI; + } + } + + // Write epilogue. x0 = 0, x1 = 1 is an SPS serialized Error::success value. + constexpr uint32_t RETInstr = 0xd65f03c0; + cantFail(writeMovRegImm64Seq(AppendInstr, 0, 0)); // mov x0, #0 + cantFail(writeMovRegImm64Seq(AppendInstr, 1, 1)); // mov x1, #1 + cantFail(AppendInstr(RETInstr)); // ret + + // Add an allocation action to call the signing function. + using namespace orc::shared; + G.allocActions().push_back( + {cantFail(WrapperFunctionCall::Create>( + SigningFunctionSym.getAddress())), + {}}); + + return Error::success(); +} + } // namespace aarch64 } // namespace jitlink } // namespace llvm diff --git a/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_ptrauth-globals.s b/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_ptrauth-globals.s new file mode 100644 index 000000000000000..1a4939f3a25c88a --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_ptrauth-globals.s @@ -0,0 +1,158 @@ +# RUN: llvm-mc -triple=arm64e-apple-macosx -filetype=obj -o %t.o %s +# RUN: llvm-jitlink %t.o +# +# REQUIRES: native && system-darwin +# +# Check that arm64e ptrauth relocations are handled correctly. +# +# This test contains eight global pointers with different signing schemes +# (IA vs DA key, with and without address diversity, and with 0 or 0xa5a5 as +# the additional diversity value). If all pointers pass authentication at +# runtime then the test returns zero. +# +# This test requires execution since the signed pointers are written by a +# signing function attached to the graph. +# +# TODO: Write an out-of-process version. This will probably need to be added to +# the ORC runtime. + + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 13, 0 sdk_version 13, 3 + .globl _main + .p2align 2 +_main: + adrp x8, _p1@PAGE + ldr x16, [x8, _p1@PAGEOFF] + autiza x16 + + adrp x9, _p2@PAGE + add x9, x9, _p2@PAGEOFF + ldr x16, [x9] + autia x16, x9 + + adrp x10, _p3@PAGE + ldr x16, [x10, _p3@PAGEOFF] + mov x17, #23130 + autia x16, x17 + + adrp x9, _p4@PAGE + add x9, x9, _p4@PAGEOFF + ldr x16, [x9] + mov x17, x9 + movk x17, #23130, lsl #48 + autia x16, x17 + + adrp x10, _p5@PAGE + ldr x10, [x10, _p5@PAGEOFF] + ldraa x10, [x10] + + adrp x9, _p6@PAGE + add x9, x9, _p6@PAGEOFF + ldr x16, [x9] + autda x16, x9 + + adrp x10, _p7@PAGE + ldr x16, [x10, _p7@PAGEOFF] + mov x17, #23130 + autda x16, x17 + + adrp x9, _p8@PAGE + add x9, x9, _p8@PAGEOFF + ldr x16, [x9] + mov x17, x9 + movk x17, #23130, lsl #48 + autda x16, x17 + + mov w0, #0 + ret + + .private_extern _a + .section __DATA,__data + .globl _a + .p2align 3 +_a: + .quad 1 + + .private_extern _b + .globl _b + .p2align 3 +_b: + .quad 2 + + .private_extern _c + .globl _c + .p2align 3 +_c: + .quad 3 + + .private_extern _d + .globl _d + .p2align 3 +_d: + .quad 4 + + .private_extern _e + .globl _e + .p2align 3 +_e: + .quad 5 + + .private_extern _f + .globl _f + .p2align 3 +_f: + .quad 6 + + .private_extern _g + .globl _g + .p2align 3 +_g: + .quad 7 + + .private_extern _h + .globl _h + .p2align 3 +_h: + .quad 8 + + .globl _p1 + .p2align 3 +_p1: + .quad _a@AUTH(ia,0) + + .globl _p2 + .p2align 3 +_p2: + .quad _b@AUTH(ia,0,addr) + + .globl _p3 + .p2align 3 +_p3: + .quad _c@AUTH(ia,23130) + + .globl _p4 + .p2align 3 +_p4: + .quad _d@AUTH(ia,23130,addr) + + .globl _p5 + .p2align 3 +_p5: + .quad _e@AUTH(da,0) + + .globl _p6 + .p2align 3 +_p6: + .quad _f@AUTH(da,0,addr) + + .globl _p7 + .p2align 3 +_p7: + .quad _g@AUTH(da,23130) + + .globl _p8y + .p2align 3 +_p8: + .quad _h@AUTH(da,23130,addr) + +.subsections_via_symbols From 922282eacfc054ddadbec04825d6573179e66200 Mon Sep 17 00:00:00 2001 From: Piyou Chen Date: Thu, 21 Nov 2024 12:26:30 +0800 Subject: [PATCH 003/351] [TargetVersion] Only enable on RISC-V and AArch64 (#115991) Address https://github.com/llvm/llvm-project/issues/115000. This patch constrains the target_version feature to work only on RISC-V and AArch64 to prevent crashes in Clang. --------- Co-authored-by: Aaron Ballman --- clang/docs/ReleaseNotes.rst | 2 ++ clang/include/clang/Basic/Attr.td | 2 +- clang/test/Sema/attr-target-version-unsupported.c | 4 ++++ 3 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 clang/test/Sema/attr-target-version-unsupported.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 999c88455b64a54..a2ff05438c949af 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -459,6 +459,8 @@ Attribute Changes in Clang - Clang now supports ``[[clang::lifetime_capture_by(X)]]``. Similar to lifetimebound, this can be used to specify when a reference to a function parameter is captured by another capturing entity ``X``. +- The ``target_version`` attribute is now only supported for AArch64 and RISC-V architectures. + Improvements to Clang's diagnostics ----------------------------------- diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 634253d0032560d..f1780fa1067352f 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -3297,7 +3297,7 @@ def Target : InheritableAttr { }]; } -def TargetVersion : InheritableAttr { +def TargetVersion : DeclOrTypeAttr, TargetSpecificAttr> { let Spellings = [GCC<"target_version">]; let Args = [StringArgument<"NamesStr">]; let Subjects = SubjectList<[Function], ErrorDiag>; diff --git a/clang/test/Sema/attr-target-version-unsupported.c b/clang/test/Sema/attr-target-version-unsupported.c new file mode 100644 index 000000000000000..7cf8172f5272e6e --- /dev/null +++ b/clang/test/Sema/attr-target-version-unsupported.c @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s + +//expected-warning@+1 {{unknown attribute 'target_version' ignored}} +int __attribute__((target_version("aes"))) foo(void) { return 3; } From 75b8f98ef69cc43289af4bddfa04e1cf90cc3d86 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Thu, 21 Nov 2024 12:42:20 +0800 Subject: [PATCH 004/351] [SLP] NFC. Change the comment to match the code execution. (#116022) Make code execute like the comment will modify many tests and affect the performance. As a result, we change the comment instead of the code. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 47dcde7d9d1899f..4b661ad40f2d465 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2222,7 +2222,7 @@ class BoUpSLP { MapVector> HashMap; // Try to be closer to the original results, if we have multiple lanes // with same cost. If 2 lanes have the same cost, use the one with the - // lowest index. + // highest index. for (int I = getNumLanes(); I > 0; --I) { unsigned Lane = I - 1; OperandsOrderData NumFreeOpsHash = From c4be13cb9c81469060e2018f4e4673440772db03 Mon Sep 17 00:00:00 2001 From: Piyou Chen Date: Thu, 21 Nov 2024 12:48:50 +0800 Subject: [PATCH 005/351] Revert "[TargetVersion] Only enable on RISC-V and AArch64" (#117110) Reverts llvm/llvm-project#115991 Due to build fail https://lab.llvm.org/buildbot/#/builders/66/builds/6511 --- clang/docs/ReleaseNotes.rst | 2 -- clang/include/clang/Basic/Attr.td | 2 +- clang/test/Sema/attr-target-version-unsupported.c | 4 ---- 3 files changed, 1 insertion(+), 7 deletions(-) delete mode 100644 clang/test/Sema/attr-target-version-unsupported.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a2ff05438c949af..999c88455b64a54 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -459,8 +459,6 @@ Attribute Changes in Clang - Clang now supports ``[[clang::lifetime_capture_by(X)]]``. Similar to lifetimebound, this can be used to specify when a reference to a function parameter is captured by another capturing entity ``X``. -- The ``target_version`` attribute is now only supported for AArch64 and RISC-V architectures. - Improvements to Clang's diagnostics ----------------------------------- diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index f1780fa1067352f..634253d0032560d 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -3297,7 +3297,7 @@ def Target : InheritableAttr { }]; } -def TargetVersion : DeclOrTypeAttr, TargetSpecificAttr> { +def TargetVersion : InheritableAttr { let Spellings = [GCC<"target_version">]; let Args = [StringArgument<"NamesStr">]; let Subjects = SubjectList<[Function], ErrorDiag>; diff --git a/clang/test/Sema/attr-target-version-unsupported.c b/clang/test/Sema/attr-target-version-unsupported.c deleted file mode 100644 index 7cf8172f5272e6e..000000000000000 --- a/clang/test/Sema/attr-target-version-unsupported.c +++ /dev/null @@ -1,4 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s - -//expected-warning@+1 {{unknown attribute 'target_version' ignored}} -int __attribute__((target_version("aes"))) foo(void) { return 3; } From 197fb270cc2f947bdde047d9aac65b653f4f6f26 Mon Sep 17 00:00:00 2001 From: Sushant Gokhale Date: Thu, 21 Nov 2024 10:23:05 +0530 Subject: [PATCH 006/351] [AArch64][NFC] NFC for const vector as Instruction operand (#116790) Current cost-modelling does not take into account cost of materializing const vector. This results in some cases, as the test shows, being vectorized but this may not always be profitable. Future patch will try to address this issue. --- .../materialize-vector-of-consts.ll | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/materialize-vector-of-consts.ll diff --git a/llvm/test/Transforms/SLPVectorizer/materialize-vector-of-consts.ll b/llvm/test/Transforms/SLPVectorizer/materialize-vector-of-consts.ll new file mode 100644 index 000000000000000..2f58bd25b75647c --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/materialize-vector-of-consts.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -mtriple=aarch64 -S %s | FileCheck %s %} + +define <2 x float> @v2f32_diff_consts(float %a, float %b) +; CHECK-LABEL: define <2 x float> @v2f32_diff_consts( +; CHECK-SAME: float [[A:%.*]], float [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[B]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP2]], +; CHECK-NEXT: ret <2 x float> [[TMP3]] +; +{ + %1 = fmul float %a, 22.0 + %2 = fmul float %b, 23.0 + %3 = insertelement <2 x float> poison, float %1, i32 0 + %4 = insertelement <2 x float> %3, float %2, i32 1 + ret <2 x float> %4 +} + +define <2 x float> @v2f32_const_splat(float %a, float %b) +; CHECK-LABEL: define <2 x float> @v2f32_const_splat( +; CHECK-SAME: float [[A:%.*]], float [[B:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[B]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP2]], splat (float 2.200000e+01) +; CHECK-NEXT: ret <2 x float> [[TMP3]] +; +{ + %1 = fmul float %a, 22.0 + %2 = fmul float %b, 22.0 + %3 = insertelement <2 x float> poison, float %1, i32 0 + %4 = insertelement <2 x float> %3, float %2, i32 1 + ret <2 x float> %4 +} + +define <4 x double> @v4f64_illegal_type(double %a, double %b, double %c, double %d) +; CHECK-LABEL: define <4 x double> @v4f64_illegal_type( +; CHECK-SAME: double [[A:%.*]], double [[B:%.*]], double [[C:%.*]], double [[D:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> poison, double [[A]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[B]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[C]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[D]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> [[TMP4]], +; CHECK-NEXT: ret <4 x double> [[TMP5]] +; +{ + %1 = fmul double %a, 21.0 + %2 = fmul double %b, 22.0 + %3 = fmul double %c, 23.0 + %4 = fmul double %d, 24.0 + %5 = insertelement <4 x double> poison, double %1, i32 0 + %6 = insertelement <4 x double> %5, double %2, i32 1 + %7 = insertelement <4 x double> %6, double %3, i32 2 + %8 = insertelement <4 x double> %7, double %4, i32 3 + ret <4 x double> %8 +} + +define <2 x double> @v2f64_dup_const_vector_case1(double %a, double %b, double %c, double %d) +; CHECK-LABEL: define <2 x double> @v2f64_dup_const_vector_case1( +; CHECK-SAME: double [[A:%.*]], double [[B:%.*]], double [[C:%.*]], double [[D:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[D]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP3]], [[TMP6]] +; CHECK-NEXT: ret <2 x double> [[TMP7]] +; +{ + %1 = fmul double %a, 21.0 + %2 = fmul double %b, 22.0 + %3 = fmul double %c, 21.0 + %4 = fmul double %d, 22.0 + %5 = insertelement <2 x double> poison, double %1, i32 0 + %6 = insertelement <2 x double> %5, double %2, i32 1 + %7 = insertelement <2 x double> poison, double %3, i32 0 + %8 = insertelement <2 x double> %7, double %4, i32 1 + %9 = fadd <2 x double> %6, %8 + ret <2 x double> %9 +} + +define <2 x double> @v2f64_dup_const_vector_case2(double %a, double %b, double %c, double %d) +; CHECK-LABEL: define <2 x double> @v2f64_dup_const_vector_case2( +; CHECK-SAME: double [[A:%.*]], double [[B:%.*]], double [[C:%.*]], double [[D:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], +; CHECK-NEXT: ret <2 x double> [[TMP4]] +; +{ + %1 = fmul double %a, 21.0 + %2 = fmul double %b, 22.0 + %3 = fadd double %1, 21.0 + %4 = fadd double %2, 22.0 + %5 = insertelement <2 x double> poison, double %3, i32 0 + %6 = insertelement <2 x double> %5, double %4, i32 1 + ret <2 x double> %6 +} From 32913724acf9e02beed46999fee1424086b8c884 Mon Sep 17 00:00:00 2001 From: Diego Caballero Date: Wed, 20 Nov 2024 20:57:39 -0800 Subject: [PATCH 007/351] [mlir][vector] Fix 0-d vector transfer mask inference (#116526) When inferring the mask of a transfer operation that results in a single `i1` element, we could represent it using either `vector` or vector<1xi1>. To avoid type mismatches, this PR updates the mask inference logic to consistently generate `vector<1xi1>` for these cases. We can enable 0-D masks if they are needed in the future. See: https://github.com/llvm/llvm-project/issues/116197 --- mlir/include/mlir/Dialect/Vector/IR/VectorOps.td | 4 +++- mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 5 +++++ mlir/test/Dialect/Vector/invalid.mlir | 15 +++++++++++++++ mlir/test/Dialect/Vector/ops.mlir | 14 ++++++++++++++ 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index c5b08d6aa022b1b..cc4cafa869e63af 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -2475,7 +2475,9 @@ def Vector_MaskOp : Vector_Op<"mask", [ should not. The `vector.mask` operation returns the value produced by the masked execution of the nested operation, if any. The masked-off lanes in the result vector are taken from the corresponding lanes of the pass-thru - argument, if provided, or left unmodified, otherwise. + argument, if provided, or left unmodified, otherwise. At this point, 0-D + vectors are not supported by `vector.mask`. They may be supported in the + future. The `vector.mask` operation does not prescribe how a maskable operation should be masked or how a masked operation should be lowered. Masking diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index db199a46e1637c8..1b2f9b7abba5e39 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -4122,6 +4122,11 @@ VectorType mlir::vector::inferTransferOpMaskType(VectorType vecType, assert(invPermMap && "Inversed permutation map couldn't be computed"); SmallVector maskShape = invPermMap.compose(vecType.getShape()); + // The MaskOp specification doesn't support 0-D vectors at the moment. Turn a + // 0-D mask into a single-element 1-D mask. + if (maskShape.empty()) + maskShape.push_back(1); + SmallVector scalableDims = applyPermutationMap(invPermMap, vecType.getScalableDims()); diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index d591c60acb64e71..0c093b0ccff1418 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -1752,6 +1752,21 @@ func.func @vector_mask_non_maskable_op(%a : vector<3x4xf32>) -> vector<3x4xf32> // ----- +func.func @vector_mask_0d_mask(%arg0: tensor<2x4xi32>, + %idx0: index, %idx1: index, + %m0: vector) -> vector<1x1x4xi32> { + %cst = arith.constant 0 : i32 + // expected-error@+1 {{'vector.mask' op operand #0 must be vector of 1-bit signless integer values, but got 'vector'}} + %res = vector.mask %m0 { + %0 = vector.transfer_read %arg0[%idx0, %idx1], %cst {permutation_map = affine_map<(d0, d1) -> (0, 0, 0)>} + : tensor<2x4xi32>, vector<1x1x4xi32> + vector.yield %0 : vector<1x1x4xi32> + } : vector -> vector<1x1x4xi32> + return %res : vector<1x1x4xi32> +} + +// ----- + func.func @vector_scalable_insert_unaligned(%subv: vector<4xi32>, %vec: vector<[16]xi32>) { // expected-error@+1 {{op failed to verify that position is a multiple of the source length.}} %0 = vector.scalable.insert %subv, %vec[2] : vector<4xi32> into vector<[16]xi32> diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir index 3baacba9b612432..04d9ff0546160a3 100644 --- a/mlir/test/Dialect/Vector/ops.mlir +++ b/mlir/test/Dialect/Vector/ops.mlir @@ -1028,6 +1028,20 @@ func.func @vector_mask_empty_return(%m0: vector<16xi1>, %arg0: vector<16xf32>) - return %0 : vector<16xf32> } +// CHECK-LABEL: func @vector_mask_scalar_broadcast_transfer +func.func @vector_mask_scalar_broadcast_transfer(%arg0: tensor<2x4xi32>, + %idx0: index, %idx1: index, + %m0: vector<1xi1>) -> vector<1x1x4xi32> { + %cst = arith.constant 0 : i32 + // CHECK: vector.mask %{{.*}} { vector.transfer_read {{.*}} } : vector<1xi1> -> vector<1x1x4xi32> + %res = vector.mask %m0 { + %0 = vector.transfer_read %arg0[%idx0, %idx1], %cst {permutation_map = affine_map<(d0, d1) -> (0, 0, 0)>} + : tensor<2x4xi32>, vector<1x1x4xi32> + vector.yield %0 : vector<1x1x4xi32> + } : vector<1xi1> -> vector<1x1x4xi32> + return %res : vector<1x1x4xi32> +} + // CHECK-LABEL: func @vector_scalable_insert( // CHECK-SAME: %[[SUB0:.*]]: vector<4xi32>, %[[SUB1:.*]]: vector<8xi32>, // CHECK-SAME: %[[SUB2:.*]]: vector<[4]xi32>, %[[SV:.*]]: vector<[8]xi32> From 42775a44c9a6ba8dc03ad4c88fa9321e78ebd434 Mon Sep 17 00:00:00 2001 From: Wu Yingcong Date: Thu, 21 Nov 2024 14:13:08 +0800 Subject: [PATCH 008/351] [ControlHeightReduction] Add assert to avoid underflow (#116339) `NumCHRedBranches - 1` is used later, we should add an assertion to make sure it will not underflow. --- llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index c8ee933913e65a7..b3883cd6e1688e9 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -1862,6 +1862,7 @@ void CHR::fixupBranchesAndSelects(CHRScope *Scope, ++NumCHRedBranches; } } + assert(NumCHRedBranches > 0); Stats.NumBranchesDelta += NumCHRedBranches - 1; Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; ORE.emit([&]() { From dbe159b3f74ea41e16782fe5708756507d4a014f Mon Sep 17 00:00:00 2001 From: donald chen Date: Thu, 21 Nov 2024 14:17:28 +0800 Subject: [PATCH 009/351] [mlir] [IR] Allow zero strides in StridedLayoutAttr (#116463) Disabling memrefs with a stride of 0 was intended to prevent internal aliasing, but this does not address all cases : internal aliasing can still occur when the stride is less than the shape. On the other hand, a stride of 0 can be very useful in certain scenarios. For example, in architectures that support multi-dimensional DMA, we can use memref::copy with a stride of 0 to achieve a broadcast effect. This commit removes the restriction that strides in memrefs cannot be 0. --- mlir/lib/IR/BuiltinAttributes.cpp | 4 ---- mlir/lib/IR/BuiltinTypes.cpp | 14 -------------- .../Dialect/Affine/memref-stride-calculation.mlir | 4 ++-- mlir/test/Dialect/MemRef/invalid.mlir | 10 ---------- mlir/test/IR/invalid-builtin-types.mlir | 5 ----- 5 files changed, 2 insertions(+), 35 deletions(-) diff --git a/mlir/lib/IR/BuiltinAttributes.cpp b/mlir/lib/IR/BuiltinAttributes.cpp index 8861a9403361336..f288dd42baaa168 100644 --- a/mlir/lib/IR/BuiltinAttributes.cpp +++ b/mlir/lib/IR/BuiltinAttributes.cpp @@ -245,9 +245,6 @@ AffineMap StridedLayoutAttr::getAffineMap() const { LogicalResult StridedLayoutAttr::verify(function_ref emitError, int64_t offset, ArrayRef strides) { - if (llvm::is_contained(strides, 0)) - return emitError() << "strides must not be zero"; - return success(); } @@ -1815,7 +1812,6 @@ AffineMap mlir::makeStridedLinearLayoutMap(ArrayRef strides, for (const auto &en : llvm::enumerate(strides)) { auto dim = en.index(); auto stride = en.value(); - assert(stride != 0 && "Invalid stride specification"); auto d = getAffineDimExpr(dim, context); AffineExpr mult; // Static case. diff --git a/mlir/lib/IR/BuiltinTypes.cpp b/mlir/lib/IR/BuiltinTypes.cpp index e8e8f3cdfbfd73c..6546234429c8cbe 100644 --- a/mlir/lib/IR/BuiltinTypes.cpp +++ b/mlir/lib/IR/BuiltinTypes.cpp @@ -798,20 +798,6 @@ static LogicalResult getStridesAndOffset(MemRefType t, for (auto &stride : strides) stride = simplifyAffineExpr(stride, numDims, numSymbols); - // In practice, a strided memref must be internally non-aliasing. Test - // against 0 as a proxy. - // TODO: static cases can have more advanced checks. - // TODO: dynamic cases would require a way to compare symbolic - // expressions and would probably need an affine set context propagated - // everywhere. - if (llvm::any_of(strides, [](AffineExpr e) { - return e == getAffineConstantExpr(0, e.getContext()); - })) { - offset = AffineExpr(); - strides.clear(); - return failure(); - } - return success(); } diff --git a/mlir/test/Dialect/Affine/memref-stride-calculation.mlir b/mlir/test/Dialect/Affine/memref-stride-calculation.mlir index cce1946b391e7ea..29a5f5e0d5f4402 100644 --- a/mlir/test/Dialect/Affine/memref-stride-calculation.mlir +++ b/mlir/test/Dialect/Affine/memref-stride-calculation.mlir @@ -51,9 +51,9 @@ func.func @f(%0: index) { %26 = memref.alloc(%0)[] : memref(i)>> // CHECK: MemRefType offset: 0 strides: 1 %27 = memref.alloc()[%0] : memref<5xf32, affine_map<(i)[M]->(M)>> -// CHECK: MemRefType memref<5xf32, affine_map<(d0)[s0] -> (s0)>> cannot be converted to strided form +// CHECK: MemRefType offset: ? strides: 0 %28 = memref.alloc()[%0] : memref<5xf32, affine_map<(i)[M]->(123)>> -// CHECK: MemRefType memref<5xf32, affine_map<(d0)[s0] -> (123)>> cannot be converted to strided form +// CHECK: MemRefType offset: 123 strides: 0 %29 = memref.alloc()[%0] : memref(M)>> // CHECK: MemRefType offset: ? strides: %30 = memref.alloc()[%0] : memref(123)>> diff --git a/mlir/test/Dialect/MemRef/invalid.mlir b/mlir/test/Dialect/MemRef/invalid.mlir index 51c4781c9022b25..f72ad48245f8193 100644 --- a/mlir/test/Dialect/MemRef/invalid.mlir +++ b/mlir/test/Dialect/MemRef/invalid.mlir @@ -245,16 +245,6 @@ func.func @memref_reinterpret_cast_no_map_but_strides(%in: memref) { // ----- -func.func @memref_reinterpret_cast_non_strided_layout(%in: memref) { - // expected-error @+1 {{expected result type to have strided layout but found 'memref<9x10xf32, affine_map<(d0, d1) -> (d0)>>}} - %out = memref.reinterpret_cast %in to - offset: [0], sizes: [9, 10], strides: [42, 1] - : memref to memref<9x10xf32, affine_map<(d0, d1) -> (d0)>> - return -} - -// ----- - func.func @memref_reshape_element_type_mismatch( %buf: memref<*xf32>, %shape: memref<1xi32>) { // expected-error @+1 {{element types of source and destination memref types should be the same}} diff --git a/mlir/test/IR/invalid-builtin-types.mlir b/mlir/test/IR/invalid-builtin-types.mlir index 07854a25000feb5..51612446d2e6a67 100644 --- a/mlir/test/IR/invalid-builtin-types.mlir +++ b/mlir/test/IR/invalid-builtin-types.mlir @@ -99,11 +99,6 @@ func.func private @memref_incorrect_strided_ending() -> memref memref> - -// ----- - // expected-error @below {{expected the number of strides to match the rank}} func.func private @memref_strided_rank_mismatch() -> memref> From e9c561e93434a5d0cbc274b7efd73d6e252b6ba4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 20 Nov 2024 21:44:53 -0800 Subject: [PATCH 010/351] [RISCV][GISel] Add atomic load/store test. Add additional atomic load/store isel patterns." --- llvm/lib/Target/RISCV/RISCVGISel.td | 13 + .../RISCV/GlobalISel/atomic-load-store.ll | 1678 +++++++++++++++++ 2 files changed, 1691 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store.ll diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index 9fd4400b97b23b6..9670e4b07068a2f 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -177,6 +177,19 @@ def : StPat; def : LdPat; // Prefer unsigned due to no c.lb in Zcb. def : StPat; +let Predicates = [HasAtomicLdSt] in { + def : LdPat; + def : LdPat; + + def : StPat; + def : StPat; +} + +let Predicates = [HasAtomicLdSt, IsRV64] in { + def : LdPat; + def : StPat; +} + //===----------------------------------------------------------------------===// // RV64 i32 patterns not used by SelectionDAG //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store.ll new file mode 100644 index 000000000000000..9a1ed8f115b35da --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store.ll @@ -0,0 +1,1678 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+a,+no-trailing-seq-cst-fence \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+a,+ztso,+no-trailing-seq-cst-fence \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s +; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+a,+no-trailing-seq-cst-fence \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+a,+ztso,+no-trailing-seq-cst-fence \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s + + +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO-TRAILING-FENCE %s +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+a,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO-TRAILING-FENCE %s + +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO-TRAILING-FENCE %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+a,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s + + +define i8 @atomic_load_i8_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i8_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i8_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lb a0, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i8_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i8_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lb a0, 0(a0) +; RV64IA-NEXT: ret + %1 = load atomic i8, ptr %a unordered, align 1 + ret i8 %1 +} + +define i8 @atomic_load_i8_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i8_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i8_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lb a0, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i8_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i8_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lb a0, 0(a0) +; RV64IA-NEXT: ret + %1 = load atomic i8, ptr %a monotonic, align 1 + ret i8 %1 +} + +define i8 @atomic_load_i8_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i8_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i8_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lb a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i8_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lb a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i8_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i8_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lb a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i8_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lb a0, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic i8, ptr %a acquire, align 1 + ret i8 %1 +} + +define i8 @atomic_load_i8_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i8_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i8_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, rw +; RV32IA-WMO-NEXT: lb a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i8_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lb a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i8_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i8_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: lb a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i8_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lb a0, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic i8, ptr %a seq_cst, align 1 + ret i8 %1 +} + +define i16 @atomic_load_i16_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i16_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i16_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lh a0, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i16_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i16_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lh a0, 0(a0) +; RV64IA-NEXT: ret + %1 = load atomic i16, ptr %a unordered, align 2 + ret i16 %1 +} + +define i16 @atomic_load_i16_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i16_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i16_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lh a0, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i16_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i16_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lh a0, 0(a0) +; RV64IA-NEXT: ret + %1 = load atomic i16, ptr %a monotonic, align 2 + ret i16 %1 +} + +define i16 @atomic_load_i16_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i16_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i16_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lh a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i16_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lh a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i16_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i16_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lh a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i16_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lh a0, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic i16, ptr %a acquire, align 2 + ret i16 %1 +} + +define i16 @atomic_load_i16_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i16_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i16_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, rw +; RV32IA-WMO-NEXT: lh a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i16_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lh a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i16_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i16_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: lh a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i16_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lh a0, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic i16, ptr %a seq_cst, align 2 + ret i16 %1 +} + +define i32 @atomic_load_i32_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i32_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i32_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i32_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i32_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lw a0, 0(a0) +; RV64IA-NEXT: ret + %1 = load atomic i32, ptr %a unordered, align 4 + ret i32 %1 +} + +define i32 @atomic_load_i32_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i32_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i32_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i32_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i32_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lw a0, 0(a0) +; RV64IA-NEXT: ret + %1 = load atomic i32, ptr %a monotonic, align 4 + ret i32 %1 +} + +define i32 @atomic_load_i32_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i32_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i32_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lw a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i32_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lw a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i32_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i32_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lw a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i32_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lw a0, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic i32, ptr %a acquire, align 4 + ret i32 %1 +} + +define i32 @atomic_load_i32_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i32_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i32_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, rw +; RV32IA-WMO-NEXT: lw a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i32_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lw a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i32_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i32_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: lw a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i32_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lw a0, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic i32, ptr %a seq_cst, align 4 + ret i32 %1 +} + +define i64 @atomic_load_i64_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i64_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i64_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 0 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i64_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i64_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: ld a0, 0(a0) +; RV64IA-NEXT: ret + %1 = load atomic i64, ptr %a unordered, align 8 + ret i64 %1 +} + +define i64 @atomic_load_i64_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i64_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i64_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 0 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i64_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i64_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: ld a0, 0(a0) +; RV64IA-NEXT: ret + %1 = load atomic i64, ptr %a monotonic, align 8 + ret i64 %1 +} + +define i64 @atomic_load_i64_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i64_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i64_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 2 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i64_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i64_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: ld a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i64_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: ld a0, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i64_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i64_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic i64, ptr %a acquire, align 8 + ret i64 %1 +} + +define i64 @atomic_load_i64_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i64_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i64_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 5 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i64_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i64_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: ld a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i64_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ld a0, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i64_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i64_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic i64, ptr %a seq_cst, align 8 + ret i64 %1 +} + +define void @atomic_store_i8_unordered(ptr %a, i8 %b) nounwind { +; RV32I-LABEL: atomic_store_i8_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __atomic_store_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_i8_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: sb a1, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_i8_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_i8_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: sb a1, 0(a0) +; RV64IA-NEXT: ret + store atomic i8 %b, ptr %a unordered, align 1 + ret void +} + +define void @atomic_store_i8_monotonic(ptr %a, i8 %b) nounwind { +; RV32I-LABEL: atomic_store_i8_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __atomic_store_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_i8_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: sb a1, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_i8_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_i8_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: sb a1, 0(a0) +; RV64IA-NEXT: ret + store atomic i8 %b, ptr %a monotonic, align 1 + ret void +} + +define void @atomic_store_i8_release(ptr %a, i8 %b) nounwind { +; RV32I-LABEL: atomic_store_i8_release: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 3 +; RV32I-NEXT: call __atomic_store_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_store_i8_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, w +; RV32IA-WMO-NEXT: sb a1, 0(a0) +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_store_i8_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sb a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_store_i8_release: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 3 +; RV64I-NEXT: call __atomic_store_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_i8_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: sb a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i8_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sb a1, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i8_release: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV32IA-WMO-TRAILING-FENCE-NEXT: sb a1, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i8_release: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: sb a1, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i8_release: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: sb a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i8_release: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: sb a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic i8 %b, ptr %a release, align 1 + ret void +} + +define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind { +; RV32I-LABEL: atomic_store_i8_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 5 +; RV32I-NEXT: call __atomic_store_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_store_i8_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, w +; RV32IA-WMO-NEXT: sb a1, 0(a0) +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_store_i8_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sb a1, 0(a0) +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_store_i8_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_store_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_i8_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: sb a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i8_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sb a1, 0(a0) +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i8_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV32IA-WMO-TRAILING-FENCE-NEXT: sb a1, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i8_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: sb a1, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i8_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: sb a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i8_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: sb a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic i8 %b, ptr %a seq_cst, align 1 + ret void +} + +define void @atomic_store_i16_unordered(ptr %a, i16 %b) nounwind { +; RV32I-LABEL: atomic_store_i16_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __atomic_store_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_i16_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: sh a1, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_i16_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_i16_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: sh a1, 0(a0) +; RV64IA-NEXT: ret + store atomic i16 %b, ptr %a unordered, align 2 + ret void +} + +define void @atomic_store_i16_monotonic(ptr %a, i16 %b) nounwind { +; RV32I-LABEL: atomic_store_i16_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __atomic_store_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_i16_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: sh a1, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_i16_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_i16_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: sh a1, 0(a0) +; RV64IA-NEXT: ret + store atomic i16 %b, ptr %a monotonic, align 2 + ret void +} + +define void @atomic_store_i16_release(ptr %a, i16 %b) nounwind { +; RV32I-LABEL: atomic_store_i16_release: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 3 +; RV32I-NEXT: call __atomic_store_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_store_i16_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, w +; RV32IA-WMO-NEXT: sh a1, 0(a0) +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_store_i16_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sh a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_store_i16_release: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 3 +; RV64I-NEXT: call __atomic_store_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_i16_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: sh a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i16_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sh a1, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i16_release: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV32IA-WMO-TRAILING-FENCE-NEXT: sh a1, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i16_release: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: sh a1, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i16_release: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: sh a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i16_release: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: sh a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic i16 %b, ptr %a release, align 2 + ret void +} + +define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind { +; RV32I-LABEL: atomic_store_i16_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 5 +; RV32I-NEXT: call __atomic_store_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_store_i16_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, w +; RV32IA-WMO-NEXT: sh a1, 0(a0) +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_store_i16_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sh a1, 0(a0) +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_store_i16_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_store_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_i16_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: sh a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i16_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sh a1, 0(a0) +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i16_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV32IA-WMO-TRAILING-FENCE-NEXT: sh a1, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i16_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: sh a1, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i16_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: sh a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i16_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: sh a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic i16 %b, ptr %a seq_cst, align 2 + ret void +} + +define void @atomic_store_i32_unordered(ptr %a, i32 %b) nounwind { +; RV32I-LABEL: atomic_store_i32_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __atomic_store_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_i32_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: sw a1, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_i32_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_i32_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: sw a1, 0(a0) +; RV64IA-NEXT: ret + store atomic i32 %b, ptr %a unordered, align 4 + ret void +} + +define void @atomic_store_i32_monotonic(ptr %a, i32 %b) nounwind { +; RV32I-LABEL: atomic_store_i32_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __atomic_store_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_i32_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: sw a1, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_i32_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_i32_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: sw a1, 0(a0) +; RV64IA-NEXT: ret + store atomic i32 %b, ptr %a monotonic, align 4 + ret void +} + +define void @atomic_store_i32_release(ptr %a, i32 %b) nounwind { +; RV32I-LABEL: atomic_store_i32_release: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 3 +; RV32I-NEXT: call __atomic_store_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_store_i32_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, w +; RV32IA-WMO-NEXT: sw a1, 0(a0) +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_store_i32_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sw a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_store_i32_release: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 3 +; RV64I-NEXT: call __atomic_store_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_i32_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: sw a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i32_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sw a1, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i32_release: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV32IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i32_release: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i32_release: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i32_release: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic i32 %b, ptr %a release, align 4 + ret void +} + +define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind { +; RV32I-LABEL: atomic_store_i32_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 5 +; RV32I-NEXT: call __atomic_store_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_store_i32_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, w +; RV32IA-WMO-NEXT: sw a1, 0(a0) +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_store_i32_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sw a1, 0(a0) +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_store_i32_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_store_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_i32_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: sw a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i32_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sw a1, 0(a0) +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i32_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV32IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i32_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i32_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i32_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic i32 %b, ptr %a seq_cst, align 4 + ret void +} + +define void @atomic_store_i64_unordered(ptr %a, i64 %b) nounwind { +; RV32I-LABEL: atomic_store_i64_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __atomic_store_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_i64_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a3, 0 +; RV32IA-NEXT: call __atomic_store_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_i64_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_i64_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: sd a1, 0(a0) +; RV64IA-NEXT: ret + store atomic i64 %b, ptr %a unordered, align 8 + ret void +} + +define void @atomic_store_i64_monotonic(ptr %a, i64 %b) nounwind { +; RV32I-LABEL: atomic_store_i64_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __atomic_store_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_i64_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a3, 0 +; RV32IA-NEXT: call __atomic_store_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_i64_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_i64_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: sd a1, 0(a0) +; RV64IA-NEXT: ret + store atomic i64 %b, ptr %a monotonic, align 8 + ret void +} + +define void @atomic_store_i64_release(ptr %a, i64 %b) nounwind { +; RV32I-LABEL: atomic_store_i64_release: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a3, 3 +; RV32I-NEXT: call __atomic_store_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_i64_release: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a3, 3 +; RV32IA-NEXT: call __atomic_store_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_i64_release: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 3 +; RV64I-NEXT: call __atomic_store_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_i64_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: sd a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i64_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sd a1, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i64_release: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i64_release: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic i64 %b, ptr %a release, align 8 + ret void +} + +define void @atomic_store_i64_seq_cst(ptr %a, i64 %b) nounwind { +; RV32I-LABEL: atomic_store_i64_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: call __atomic_store_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_i64_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a3, 5 +; RV32IA-NEXT: call __atomic_store_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_i64_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_store_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_i64_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: sd a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i64_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sd a1, 0(a0) +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_i64_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_i64_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic i64 %b, ptr %a seq_cst, align 8 + ret void +} From 476b208e0115e766605e9f850982996a1d51c287 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Thu, 21 Nov 2024 07:54:38 +0100 Subject: [PATCH 011/351] [clang][bytecode] Fix ToType/FromType diagnostic ordering (#116988) We need to check the ToType first, then the FromType. Additionally, remove qualifiers from the parent type of the field we're emitting a note for. --- clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp | 7 +++---- clang/test/AST/ByteCode/builtin-bit-cast.cpp | 11 +++++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp index 7e8853d34693172..b1230f92ddf1d49 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp @@ -254,7 +254,7 @@ static bool CheckBitcastType(InterpState &S, CodePtr OpPC, QualType T, }; auto note = [&](int Construct, QualType NoteType, SourceRange NoteRange) { S.Note(NoteRange.getBegin(), diag::note_constexpr_bit_cast_invalid_subtype) - << NoteType << Construct << T << NoteRange; + << NoteType << Construct << T.getUnqualifiedType() << NoteRange; return false; }; @@ -388,11 +388,10 @@ bool clang::interp::DoBitCastPtr(InterpState &S, CodePtr OpPC, QualType FromType = FromPtr.getType(); QualType ToType = ToPtr.getType(); - if (!CheckBitcastType(S, OpPC, FromType, /*IsToType=*/false)) - return false; - if (!CheckBitcastType(S, OpPC, ToType, /*IsToType=*/true)) return false; + if (!CheckBitcastType(S, OpPC, FromType, /*IsToType=*/false)) + return false; BitcastBuffer Buffer; readPointerToBuffer(S.getContext(), FromPtr, Buffer, diff --git a/clang/test/AST/ByteCode/builtin-bit-cast.cpp b/clang/test/AST/ByteCode/builtin-bit-cast.cpp index 60e8c3a615c5e63..3c6cd0aa36e7fe1 100644 --- a/clang/test/AST/ByteCode/builtin-bit-cast.cpp +++ b/clang/test/AST/ByteCode/builtin-bit-cast.cpp @@ -145,6 +145,17 @@ namespace Fail { // both-note {{initializer of 'a' is not a constant expression}} } +namespace ToPtr { + struct S { + const int *p = nullptr; + }; + struct P { + const int *p; // both-note {{invalid type 'const int *' is a member of 'ToPtr::P'}} + }; + constexpr P p = __builtin_bit_cast(P, S{}); // both-error {{must be initialized by a constant expression}} \ + // both-note {{bit_cast to a pointer type is not allowed in a constant expression}} +} + namespace NullPtr { constexpr nullptr_t N = __builtin_bit_cast(nullptr_t, (intptr_t)1u); static_assert(N == nullptr); From 7c0786363e6b14e05a868cfe7614074cf742e7cc Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 21 Nov 2024 18:14:10 +1100 Subject: [PATCH 012/351] [ORC-RT] Test basic C++ static initialization support in the ORC runtime. This tests that a simple C++ static initializer works as expected. Compared to the architecture specific, assembly level regression tests for the ORC runtime; this test is expected to catch cases where the compiler adopts some new MachO feature that the ORC runtime does not yet support (e.g. a new initializer section). --- .../Darwin/Generic/trivial-cxx-constructor.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 compiler-rt/test/orc/TestCases/Darwin/Generic/trivial-cxx-constructor.cpp diff --git a/compiler-rt/test/orc/TestCases/Darwin/Generic/trivial-cxx-constructor.cpp b/compiler-rt/test/orc/TestCases/Darwin/Generic/trivial-cxx-constructor.cpp new file mode 100644 index 000000000000000..c3c9cad6342bc6e --- /dev/null +++ b/compiler-rt/test/orc/TestCases/Darwin/Generic/trivial-cxx-constructor.cpp @@ -0,0 +1,17 @@ +// RUN: %clangxx -c -o %t %s +// RUN: %llvm_jitlink %t +// +// REQUIRES: system-darwin && host-arch-compatible + +static int x = 1; + +class Init { +public: + Init() { x = 0; } +}; + +static Init I; + +int main(int argc, char *argv[]) { + return x; +} From a6fefc82450e054336a52a5d2d915b780b8c3ef7 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Thu, 21 Nov 2024 15:33:18 +0800 Subject: [PATCH 013/351] [InstCombine] Convert logical and/or with `icmp samesign` into bitwise ops (#116983) See the following case: ``` define i1 @test_logical_and_icmp_samesign(i8 %x) { %cmp1 = icmp ne i8 %x, 9 %cmp2 = icmp samesign ult i8 %x, 11 %and = select i1 %cmp1, i1 %cmp2, i1 false ret i1 %and } ``` Currently we cannot convert this logical and into a bitwise and due to the `samesign` flag. But if `%cmp2` evaluates to `poison`, we can infer that `%cmp1` is either `poison` or `true` (`samesign` violation indicates that X is negative). Therefore, `%and` still evaluates to `poison`. This patch converts a logical and into a bitwise and iff TV is poison implies that Cond is either poison or true. Likewise, we convert a logical or into a bitwise or iff FV is poison implies that Cond is either poison or false. Note: 1. This logic is implemented in InstCombine. Not sure whether it is profitable to move it into ValueTracking and call `impliesPoison(TV/FV, Sel)` instead. 2. We only handle the case that `ValAssumedPoison` is `icmp samesign pred X, C1` and `V` is `icmp pred X, C2`. There are no suitable variants for `isImpliedCondition` to pass the fact that X is [non-]negative. Alive2: https://alive2.llvm.org/ce/z/eorFfa Motivation: fix [a major regression](https://github.com/dtcxzyw/llvm-opt-benchmark/pull/1724#discussion_r1849663863) to unblock https://github.com/llvm/llvm-project/pull/112742. --- .../InstCombine/InstCombineSelect.cpp | 41 +++++- .../Transforms/InstCombine/logical-select.ll | 118 ++++++++++++++++++ 2 files changed, 155 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 2526ce7704ab121..e5525133e5dbb5c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3112,6 +3112,39 @@ static Instruction *foldNestedSelects(SelectInst &OuterSelVal, !IsAndVariant ? SelInner : InnerSel.FalseVal); } +/// Return true if V is poison or \p Expected given that ValAssumedPoison is +/// already poison. For example, if ValAssumedPoison is `icmp samesign X, 10` +/// and V is `icmp ne X, 5`, impliesPoisonOrCond returns true. +static bool impliesPoisonOrCond(const Value *ValAssumedPoison, const Value *V, + bool Expected) { + if (impliesPoison(ValAssumedPoison, V)) + return true; + + // Handle the case that ValAssumedPoison is `icmp samesign pred X, C1` and V + // is `icmp pred X, C2`, where C1 is well-defined. + if (auto *ICmp = dyn_cast(ValAssumedPoison)) { + Value *LHS = ICmp->getOperand(0); + const APInt *RHSC1; + const APInt *RHSC2; + ICmpInst::Predicate Pred; + if (ICmp->hasSameSign() && + match(ICmp->getOperand(1), m_APIntForbidPoison(RHSC1)) && + match(V, m_ICmp(Pred, m_Specific(LHS), m_APIntAllowPoison(RHSC2)))) { + unsigned BitWidth = RHSC1->getBitWidth(); + ConstantRange CRX = + RHSC1->isNonNegative() + ? ConstantRange(APInt::getSignedMinValue(BitWidth), + APInt::getZero(BitWidth)) + : ConstantRange(APInt::getZero(BitWidth), + APInt::getSignedMinValue(BitWidth)); + return CRX.icmp(Expected ? Pred : ICmpInst::getInversePredicate(Pred), + *RHSC2); + } + } + + return false; +} + Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); @@ -3133,13 +3166,13 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { // checks whether folding it does not convert a well-defined value into // poison. if (match(TrueVal, m_One())) { - if (impliesPoison(FalseVal, CondVal)) { + if (impliesPoisonOrCond(FalseVal, CondVal, /*Expected=*/false)) { // Change: A = select B, true, C --> A = or B, C return BinaryOperator::CreateOr(CondVal, FalseVal); } if (match(CondVal, m_OneUse(m_Select(m_Value(A), m_One(), m_Value(B)))) && - impliesPoison(FalseVal, B)) { + impliesPoisonOrCond(FalseVal, B, /*Expected=*/false)) { // (A || B) || C --> A || (B | C) return replaceInstUsesWith( SI, Builder.CreateLogicalOr(A, Builder.CreateOr(B, FalseVal))); @@ -3175,13 +3208,13 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { } if (match(FalseVal, m_Zero())) { - if (impliesPoison(TrueVal, CondVal)) { + if (impliesPoisonOrCond(TrueVal, CondVal, /*Expected=*/true)) { // Change: A = select B, C, false --> A = and B, C return BinaryOperator::CreateAnd(CondVal, TrueVal); } if (match(CondVal, m_OneUse(m_Select(m_Value(A), m_Value(B), m_Zero()))) && - impliesPoison(TrueVal, B)) { + impliesPoisonOrCond(TrueVal, B, /*Expected=*/true)) { // (A && B) && C --> A && (B & C) return replaceInstUsesWith( SI, Builder.CreateLogicalAnd(A, Builder.CreateAnd(B, TrueVal))); diff --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll index 1b6e816d2e624ea..050a53406a9c597 100644 --- a/llvm/test/Transforms/InstCombine/logical-select.ll +++ b/llvm/test/Transforms/InstCombine/logical-select.ll @@ -1521,3 +1521,121 @@ bb: %and2 = or i1 %and1, %cmp ret i1 %and2 } + +define i1 @test_logical_and_icmp_samesign(i8 %x) { +; CHECK-LABEL: @test_logical_and_icmp_samesign( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i8 [[X:%.*]], 9 +; CHECK-NEXT: [[CMP2:%.*]] = icmp samesign ult i8 [[X]], 11 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[AND]] +; + %cmp1 = icmp ne i8 %x, 9 + %cmp2 = icmp samesign ult i8 %x, 11 + %and = select i1 %cmp1, i1 %cmp2, i1 false + ret i1 %and +} + +define i1 @test_logical_or_icmp_samesign(i8 %x) { +; CHECK-LABEL: @test_logical_or_icmp_samesign( +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[X:%.*]], -9 +; CHECK-NEXT: [[CMP2:%.*]] = icmp samesign ult i8 [[X]], -11 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[OR]] +; + %cmp1 = icmp eq i8 %x, -9 + %cmp2 = icmp samesign ult i8 %x, -11 + %or = select i1 %cmp1, i1 true, i1 %cmp2 + ret i1 %or +} + +define i1 @test_double_logical_and_icmp_samesign1(i1 %cond, i32 %y) { +; CHECK-LABEL: @test_double_logical_and_icmp_samesign1( +; CHECK-NEXT: [[CMP2:%.*]] = icmp samesign ult i32 [[Y:%.*]], 4 +; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[SEL1:%.*]], i1 [[CMP2]], i1 false +; CHECK-NEXT: ret i1 [[SEL2]] +; + %cmp1 = icmp ne i32 %y, 5 + %sel1 = select i1 %cond, i1 %cmp1, i1 false + %cmp2 = icmp samesign ult i32 %y, 4 + %sel2 = select i1 %sel1, i1 %cmp2, i1 false + ret i1 %sel2 +} + +define i1 @test_double_logical_and_icmp_samesign2(i1 %cond, i32 %y) { +; CHECK-LABEL: @test_double_logical_and_icmp_samesign2( +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[Y:%.*]], -65536 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[TMP1]], 1048576 +; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[SEL1:%.*]], i1 [[CMP2]], i1 false +; CHECK-NEXT: ret i1 [[SEL2]] +; + %cmp1 = icmp samesign ugt i32 %y, 65535 + %sel1 = select i1 %cond, i1 %cmp1, i1 false + %cmp2 = icmp samesign ult i32 %y, 1114112 + %sel2 = select i1 %sel1, i1 %cmp2, i1 false + ret i1 %sel2 +} + +define <2 x i1> @test_logical_and_icmp_samesign_vec(<2 x i8> %x) { +; CHECK-LABEL: @test_logical_and_icmp_samesign_vec( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i8> [[X:%.*]], splat (i8 9) +; CHECK-NEXT: [[CMP2:%.*]] = icmp samesign ult <2 x i8> [[X]], splat (i8 11) +; CHECK-NEXT: [[AND:%.*]] = and <2 x i1> [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret <2 x i1> [[AND]] +; + %cmp1 = icmp ne <2 x i8> %x, splat(i8 9) + %cmp2 = icmp samesign ult <2 x i8> %x, splat(i8 11) + %and = select <2 x i1> %cmp1, <2 x i1> %cmp2, <2 x i1> zeroinitializer + ret <2 x i1> %and +} + +define <2 x i1> @test_logical_and_icmp_samesign_vec_with_poison_cond(<2 x i8> %x) { +; CHECK-LABEL: @test_logical_and_icmp_samesign_vec_with_poison_cond( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP2:%.*]] = icmp samesign ult <2 x i8> [[X]], splat (i8 11) +; CHECK-NEXT: [[AND:%.*]] = and <2 x i1> [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret <2 x i1> [[AND]] +; + %cmp1 = icmp ne <2 x i8> %x, + %cmp2 = icmp samesign ult <2 x i8> %x, splat(i8 11) + %and = select <2 x i1> %cmp1, <2 x i1> %cmp2, <2 x i1> zeroinitializer + ret <2 x i1> %and +} + +define i1 @test_logical_and_icmp_samesign_do_not_imply(i8 %x) { +; CHECK-LABEL: @test_logical_and_icmp_samesign_do_not_imply( +; CHECK-NEXT: [[AND:%.*]] = icmp ult i8 [[X:%.*]], 11 +; CHECK-NEXT: ret i1 [[AND]] +; + %cmp1 = icmp ne i8 %x, -9 + %cmp2 = icmp samesign ult i8 %x, 11 + %and = select i1 %cmp1, i1 %cmp2, i1 false + ret i1 %and +} + +define i1 @test_logical_and_icmp_no_samesign(i8 %x) { +; CHECK-LABEL: @test_logical_and_icmp_no_samesign( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i8 [[X:%.*]], 9 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[X]], 11 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[AND]] +; + %cmp1 = icmp ne i8 %x, 9 + %cmp2 = icmp ult i8 %x, 11 + %and = select i1 %cmp1, i1 %cmp2, i1 false + ret i1 %and +} + +; Negative tests + +define <2 x i1> @test_logical_and_icmp_samesign_vec_with_poison_tv(<2 x i8> %x) { +; CHECK-LABEL: @test_logical_and_icmp_samesign_vec_with_poison_tv( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne <2 x i8> [[X:%.*]], splat (i8 9) +; CHECK-NEXT: [[CMP2:%.*]] = icmp samesign ult <2 x i8> [[X]], +; CHECK-NEXT: [[AND:%.*]] = select <2 x i1> [[CMP1]], <2 x i1> [[CMP2]], <2 x i1> zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[AND]] +; + %cmp1 = icmp ne <2 x i8> %x, splat(i8 9) + %cmp2 = icmp samesign ult <2 x i8> %x, + %and = select <2 x i1> %cmp1, <2 x i1> %cmp2, <2 x i1> zeroinitializer + ret <2 x i1> %and +} From 97b2903455fbe2de0c88cf07b92a09dc8cb7e699 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Wed, 20 Nov 2024 23:44:18 -0800 Subject: [PATCH 014/351] [NFCI][WPD]Use unique string saver to store type id (#106932) Currently, both [TypeIdMap](https://github.com/llvm/llvm-project/blob/67a1fdb014790a38a205d28e1748634de34471dd/llvm/include/llvm/IR/ModuleSummaryIndex.h#L1356) and [TypeIdCompatibleVtableMap](https://github.com/llvm/llvm-project/blob/67a1fdb014790a38a205d28e1748634de34471dd/llvm/include/llvm/IR/ModuleSummaryIndex.h#L1363) keep type-id as `std::string` in the combined index for LTO indexing analysis. With this change, index uses a unique-string-saver to own the string copies and two maps above can use string references to save some memory. This shows a 3% memory reduction (from 8.2GiB to 7.9GiB) in an internal binary with high indexing memory usage. --- llvm/include/llvm/IR/ModuleSummaryIndex.h | 18 ++++++++++------- llvm/include/llvm/IR/ModuleSummaryIndexYAML.h | 20 ++++++++++++++++--- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 4 ++-- 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 39c60229aa1d81c..a4eb75ceb6930fb 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -1315,7 +1315,7 @@ using GVSummaryPtrSet = std::unordered_set; /// Map of a type GUID to type id string and summary (multimap used /// in case of GUID conflicts). using TypeIdSummaryMapTy = - std::multimap>; + std::multimap>; /// The following data structures summarize type metadata information. /// For type metadata overview see https://llvm.org/docs/TypeMetadata.html. @@ -1351,6 +1351,9 @@ class ModuleSummaryIndex { /// Holds strings for combined index, mapping to the corresponding module ID. ModulePathStringTableTy ModulePathStringTable; + BumpPtrAllocator TypeIdSaverAlloc; + UniqueStringSaver TypeIdSaver; + /// Mapping from type identifier GUIDs to type identifier and its summary /// information. Produced by thin link. TypeIdSummaryMapTy TypeIdMap; @@ -1359,7 +1362,7 @@ class ModuleSummaryIndex { /// with that type identifier's metadata. Produced by per module summary /// analysis and consumed by thin link. For more information, see description /// above where TypeIdCompatibleVtableInfo is defined. - std::map> + std::map> TypeIdCompatibleVtableMap; /// Mapping from original ID to GUID. If original ID can map to multiple @@ -1455,8 +1458,9 @@ class ModuleSummaryIndex { // See HaveGVs variable comment. ModuleSummaryIndex(bool HaveGVs, bool EnableSplitLTOUnit = false, bool UnifiedLTO = false) - : HaveGVs(HaveGVs), EnableSplitLTOUnit(EnableSplitLTOUnit), - UnifiedLTO(UnifiedLTO), Saver(Alloc) {} + : TypeIdSaver(TypeIdSaverAlloc), HaveGVs(HaveGVs), + EnableSplitLTOUnit(EnableSplitLTOUnit), UnifiedLTO(UnifiedLTO), + Saver(Alloc) {} // Current version for the module summary in bitcode files. // The BitcodeSummaryVersion should be bumped whenever we introduce changes @@ -1829,8 +1833,8 @@ class ModuleSummaryIndex { for (auto &[GUID, TypeIdPair] : make_range(TidIter)) if (TypeIdPair.first == TypeId) return TypeIdPair.second; - auto It = TypeIdMap.insert( - {GlobalValue::getGUID(TypeId), {std::string(TypeId), TypeIdSummary()}}); + auto It = TypeIdMap.insert({GlobalValue::getGUID(TypeId), + {TypeIdSaver.save(TypeId), TypeIdSummary()}}); return It->second.second; } @@ -1859,7 +1863,7 @@ class ModuleSummaryIndex { /// the ThinLTO backends. TypeIdCompatibleVtableInfo & getOrInsertTypeIdCompatibleVtableSummary(StringRef TypeId) { - return TypeIdCompatibleVtableMap[std::string(TypeId)]; + return TypeIdCompatibleVtableMap[TypeIdSaver.save(TypeId)]; } /// For the given \p TypeId, this returns the TypeIdCompatibleVtableMap diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h index 7c405025630c95f..b23fd4a72c93b65 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -313,11 +313,11 @@ template <> struct CustomMappingTraits { static void inputOne(IO &io, StringRef Key, TypeIdSummaryMapTy &V) { TypeIdSummary TId; io.mapRequired(Key.str().c_str(), TId); - V.insert({GlobalValue::getGUID(Key), {std::string(Key), TId}}); + V.insert({GlobalValue::getGUID(Key), {Key, TId}}); } static void output(IO &io, TypeIdSummaryMapTy &V) { for (auto &TidIter : V) - io.mapRequired(TidIter.second.first.c_str(), TidIter.second.second); + io.mapRequired(TidIter.second.first.str().c_str(), TidIter.second.second); } }; @@ -327,7 +327,21 @@ template <> struct MappingTraits { if (!io.outputting()) CustomMappingTraits::fixAliaseeLinks( index.GlobalValueMap); - io.mapOptional("TypeIdMap", index.TypeIdMap); + + if (io.outputting()) { + io.mapOptional("TypeIdMap", index.TypeIdMap); + } else { + TypeIdSummaryMapTy TypeIdMap; + io.mapOptional("TypeIdMap", TypeIdMap); + for (auto &[TypeGUID, TypeIdSummaryMap] : TypeIdMap) { + // Save type id references in index and point TypeIdMap to use the + // references owned by index. + StringRef KeyRef = index.TypeIdSaver.save(TypeIdSummaryMap.first); + index.TypeIdMap.insert( + {TypeGUID, {KeyRef, std::move(TypeIdSummaryMap.second)}}); + } + } + io.mapOptional("WithGlobalValueDeadStripping", index.WithGlobalValueDeadStripping); diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 80e12bef502ace9..59e070a5110620c 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -4165,7 +4165,7 @@ static void writeWholeProgramDevirtResolution( static void writeTypeIdSummaryRecord(SmallVector &NameVals, StringTableBuilder &StrtabBuilder, - const std::string &Id, + StringRef Id, const TypeIdSummary &Summary) { NameVals.push_back(StrtabBuilder.add(Id)); NameVals.push_back(Id.size()); @@ -4184,7 +4184,7 @@ static void writeTypeIdSummaryRecord(SmallVector &NameVals, static void writeTypeIdCompatibleVtableSummaryRecord( SmallVector &NameVals, StringTableBuilder &StrtabBuilder, - const std::string &Id, const TypeIdCompatibleVtableInfo &Summary, + StringRef Id, const TypeIdCompatibleVtableInfo &Summary, ValueEnumerator &VE) { NameVals.push_back(StrtabBuilder.add(Id)); NameVals.push_back(Id.size()); From abb9f9fa06ef22be2b0287b9047d5cfed71d91d4 Mon Sep 17 00:00:00 2001 From: Lee Wei Date: Thu, 21 Nov 2024 01:06:56 -0700 Subject: [PATCH 015/351] [llvm] Remove `br i1 undef` from some regression tests [NFC] (#117112) This PR removes tests with `br i1 undef` under `llvm/tests/Transforms/Loop*, Lower*`. --- llvm/test/Transforms/LoopInterchange/lcssa.ll | 12 ++-- .../pr43176-move-to-new-latch.ll | 4 +- ...r43473-invalid-lcssa-phis-in-inner-exit.ll | 20 +++--- ...97-lcssa-for-multiple-outer-loop-blocks.ll | 8 +-- .../Transforms/LoopInterchange/pr57148.ll | 4 +- llvm/test/Transforms/LoopLoadElim/pr-48150.ll | 4 +- llvm/test/Transforms/LoopLoadElim/pr47457.ll | 6 +- .../LoopPredication/predicate-exits.ll | 2 +- llvm/test/Transforms/LoopRotate/crash.ll | 14 ++-- .../Transforms/LoopRotate/multiple-exits.ll | 8 +-- llvm/test/Transforms/LoopRotate/pr22337.ll | 6 +- llvm/test/Transforms/LoopRotate/pr33701.ll | 4 +- llvm/test/Transforms/LoopRotate/pr37205.ll | 2 +- .../LoopRotate/preserve-loop-simplify.ll | 8 +-- .../Transforms/LoopRotate/preserve-mssa.ll | 22 +++--- .../2010-07-15-IncorrectDomFrontierUpdate.ll | 4 +- .../2010-12-26-PHIInfiniteLoop.ll | 12 ++-- .../test/Transforms/LoopSimplify/dup-preds.ll | 12 ++-- .../Transforms/LoopSimplify/indirectbr.ll | 6 +- .../Transforms/LoopSimplify/notify-scev.ll | 18 ++--- llvm/test/Transforms/LoopSimplify/pr28272.ll | 24 +++---- llvm/test/Transforms/LoopSimplify/pr30454.ll | 6 +- .../LoopSimplify/unreachable-loop-pred.ll | 4 +- .../LoopSimplifyCFG/constant-fold-branch.ll | 14 ++-- .../LoopSimplifyCFG/update_parents.ll | 8 +-- .../LoopStrengthReduce/2011-10-14-IntPtr.ll | 6 +- .../2011-12-19-PostincQuadratic.ll | 2 +- .../2013-01-14-ReuseCast.ll | 14 ++-- .../LoopStrengthReduce/AArch64/pr47329.ll | 72 +++++++++---------- .../AMDGPU/lsr-invalid-ptr-extend.ll | 6 +- .../AMDGPU/lsr-void-inseltpoison.ll | 4 +- .../LoopStrengthReduce/AMDGPU/lsr-void.ll | 4 +- .../AMDGPU/preserve-addrspace-assert.ll | 2 +- .../ARM/addrec-is-loop-invariant.ll | 4 +- .../Power/incomplete-phi.ll | 4 +- .../X86/2009-11-10-LSRCrash.ll | 32 ++++----- .../X86/2011-07-20-DoubleIV.ll | 4 +- .../X86/2012-01-13-phielim.ll | 34 ++++----- .../X86/no_superflous_induction_vars.ll | 4 +- .../LoopStrengthReduce/X86/pr40514.ll | 2 +- .../callbr-critical-edge-splitting.ll | 4 +- .../LoopStrengthReduce/dominate-assert.ll | 18 ++--- .../Transforms/LoopStrengthReduce/funclet.ll | 8 +-- .../hoist-parent-preheader.ll | 6 +- .../Transforms/LoopStrengthReduce/ivchain.ll | 8 +-- .../LoopStrengthReduce/nonintegral.ll | 6 +- .../Transforms/LoopStrengthReduce/pr12048.ll | 8 +-- .../Transforms/LoopStrengthReduce/pr50765.ll | 4 +- .../scaling-factor-incompat-type.ll | 2 +- .../scaling_factor_cost_crash.ll | 30 ++++---- .../scev-after-loopinstsimplify.ll | 4 +- .../LoopStrengthReduce/scev-expander-lcssa.ll | 20 +++--- .../uglygep-address-space.ll | 2 +- .../Transforms/LoopStrengthReduce/uglygep.ll | 2 +- .../LoopUnroll/2011-08-08-PhiUpdate.ll | 22 +++--- .../LoopUnroll/full-unroll-crashers.ll | 28 ++++---- llvm/test/Transforms/LoopUnroll/pr10813.ll | 6 +- llvm/test/Transforms/LoopUnroll/pr14167.ll | 6 +- llvm/test/Transforms/LoopUnroll/pr27157.ll | 14 ++-- llvm/test/Transforms/LoopUnroll/pr28132.ll | 6 +- .../Transforms/LoopUnroll/rebuild_lcssa.ll | 10 +-- .../LoopUnroll/runtime-loop-multiple-exits.ll | 52 +++++++------- llvm/test/Transforms/LoopUnroll/unloop.ll | 22 +++--- .../LoopVectorize/2012-10-20-infloop.ll | 12 ++-- .../X86/consecutive-ptr-cg-bug.ll | 12 ++-- .../Transforms/LoopVectorize/X86/pr39160.ll | 6 +- .../Transforms/LoopVectorize/X86/rauw-bug.ll | 4 +- .../LoopVectorize/X86/reduction-crash.ll | 4 +- .../Transforms/LoopVectorize/if-conv-crash.ll | 10 +-- .../LoopVectorize/incorrect-dom-info.ll | 44 ++++++------ .../Transforms/LoopVectorize/nsw-crash.ll | 4 +- llvm/test/Transforms/LoopVectorize/pr36311.ll | 6 +- .../LoopVectorize/reduction-order.ll | 4 +- .../stale-worklist-phi.ll | 4 +- .../condition-phi-unreachable-default.ll | 2 +- .../do-not-handle-impossible-values.ll | 6 +- .../LowerSwitch/phi-in-dead-block.ll | 8 +-- 77 files changed, 420 insertions(+), 420 deletions(-) diff --git a/llvm/test/Transforms/LoopInterchange/lcssa.ll b/llvm/test/Transforms/LoopInterchange/lcssa.ll index b41eba4ef561736..0a5aefd9e49111a 100644 --- a/llvm/test/Transforms/LoopInterchange/lcssa.ll +++ b/llvm/test/Transforms/LoopInterchange/lcssa.ll @@ -180,7 +180,7 @@ for.end16: ; preds = %for.exit ; REMARK: Interchanged ; REMARK-NEXT: lcssa_05 -define void @lcssa_05(ptr %ptr) { +define void @lcssa_05(ptr %ptr, i1 %arg) { entry: br label %outer.header @@ -190,7 +190,7 @@ outer.header: ; preds = %outer.inc, %entry for.body3: ; preds = %bb3, %outer.header %iv.inner = phi i64 [ %iv.inner.next, %bb3 ], [ 1, %outer.header ] - br i1 undef, label %bb2, label %bb3 + br i1 %arg, label %bb2, label %bb3 bb2: ; preds = %for.body3 %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], ptr @A, i64 0, i64 %iv.inner, i64 %iv.outer @@ -225,13 +225,13 @@ for.end16: ; preds = %for.exit ; REMARK: UnsupportedExitPHI ; REMARK-NEXT: lcssa_06 -define void @lcssa_06(ptr %ptr, ptr %ptr1) { +define void @lcssa_06(ptr %ptr, ptr %ptr1, i1 %arg) { entry: br label %outer.header outer.header: ; preds = %outer.inc, %entry %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] - br i1 undef, label %for.body3, label %outer.inc + br i1 %arg, label %for.body3, label %outer.inc for.body3: ; preds = %for.body3, %outer.header %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ] @@ -305,13 +305,13 @@ for.end16: ; preds = %for.exit ; is an lcssa phi node outside the loopnest. ; REMARK: Interchanged ; REMARK-NEXT: lcssa_08 -define i64 @lcssa_08(ptr %Arr) { +define i64 @lcssa_08(ptr %Arr, i1 %arg) { entry: br label %for1.header for1.header: ; preds = %for1.inc, %entry %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ] - br i1 undef, label %for2, label %for1.inc + br i1 %arg, label %for2, label %for1.inc for2: ; preds = %for2, %for1.header %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ] diff --git a/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll b/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll index cc787fa55600a6b..9d2e393937bd5cb 100644 --- a/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll +++ b/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll @@ -45,7 +45,7 @@ for.cond1.for.end_crit_edge: ; preds = %for.inc for.inc3: ; preds = %for.cond1.for.end_crit_edge %inc4 = add nsw i32 %inc41, 1 - br i1 undef, label %for.body, label %for.cond.for.end5_crit_edge + br i1 false, label %for.body, label %for.cond.for.end5_crit_edge for.cond.for.end5_crit_edge: ; preds = %for.inc3 ret void @@ -86,7 +86,7 @@ for.cond1.for.end_crit_edge: ; preds = %for.inc for.inc3: ; preds = %for.cond1.for.end_crit_edge %inc4 = add nsw i32 %inc41, 1 - br i1 undef, label %for.body, label %for.cond.for.end5_crit_edge + br i1 false, label %for.body, label %for.cond.for.end5_crit_edge for.cond.for.end5_crit_edge: ; preds = %for.inc3 ret void diff --git a/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll b/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll index dfa20642b938588..1bf1c8abba7cbbb 100644 --- a/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll +++ b/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll @@ -6,7 +6,7 @@ ; In the 2 test cases below, we have a LCSSA PHI in the inner loop exit, which ; is used in the outer loop latch. This is not supported. -define void @test1() { +define void @test1(i1 %arg) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] @@ -19,12 +19,12 @@ define void @test1() { ; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[IDX]], align 8 ; CHECK-NEXT: store double undef, ptr [[IDX]], align 8 ; CHECK-NEXT: [[INNER_IV_NEXT]] = add nuw nsw i64 [[INNER_IV]], 1 -; CHECK-NEXT: br i1 false, label [[INNER]], label [[OUTER_LATCH]] +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[INNER]], label [[OUTER_LATCH]] ; CHECK: outer.latch: ; CHECK-NEXT: [[INC43_LCSSA_WIDE_US:%.*]] = phi i64 [ [[INNER_IV_NEXT]], [[INNER]] ] ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INC43_LCSSA_WIDE_US]] to i32 ; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], 1 -; CHECK-NEXT: br i1 false, label [[OUTER_HEADER]], label [[OUTER_EXIT:%.*]] +; CHECK-NEXT: br i1 [[ARG]], label [[OUTER_HEADER]], label [[OUTER_EXIT:%.*]] ; CHECK: outer.exit: ; CHECK-NEXT: ret void ; @@ -41,20 +41,20 @@ inner: ; preds = %for.body28.us, %for.body25. %0 = load double, ptr %idx, align 8 store double undef, ptr %idx, align 8 %inner.iv.next = add nuw nsw i64 %inner.iv, 1 - br i1 undef, label %inner, label %outer.latch + br i1 %arg, label %inner, label %outer.latch outer.latch: ; preds = %inner %inc43.lcssa.wide.us = phi i64 [ %inner.iv.next, %inner ] %1 = trunc i64 %inc43.lcssa.wide.us to i32 %outer.iv.next = add nsw i64 %outer.iv, 1 - br i1 undef, label %outer.header, label %outer.exit + br i1 %arg, label %outer.header, label %outer.exit outer.exit: ; preds = %for.cond26.for.end44_crit_edge.us ret void } ; Same as @test1, but with a dedicated inner loop exit block. -define void @test2() { +define void @test2(i1 %arg) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] @@ -67,14 +67,14 @@ define void @test2() { ; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[IDX]], align 8 ; CHECK-NEXT: store double undef, ptr [[IDX]], align 8 ; CHECK-NEXT: [[INNER_IV_NEXT]] = add nuw nsw i64 [[INNER_IV]], 1 -; CHECK-NEXT: br i1 false, label [[INNER]], label [[INNER_EXIT:%.*]] +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[INNER]], label [[INNER_EXIT:%.*]] ; CHECK: inner.exit: ; CHECK-NEXT: [[INC43_LCSSA_WIDE_US:%.*]] = phi i64 [ [[INNER_IV_NEXT]], [[INNER]] ] ; CHECK-NEXT: br label [[OUTER_LATCH]] ; CHECK: outer.latch: ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INC43_LCSSA_WIDE_US]] to i32 ; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], 1 -; CHECK-NEXT: br i1 false, label [[OUTER_HEADER]], label [[OUTER_EXIT:%.*]] +; CHECK-NEXT: br i1 [[ARG]], label [[OUTER_HEADER]], label [[OUTER_EXIT:%.*]] ; CHECK: outer.exit: ; CHECK-NEXT: ret void ; @@ -91,7 +91,7 @@ inner: ; preds = %for.body28.us, %for.body25. %0 = load double, ptr %idx, align 8 store double undef, ptr %idx, align 8 %inner.iv.next = add nuw nsw i64 %inner.iv, 1 - br i1 undef, label %inner, label %inner.exit + br i1 %arg, label %inner, label %inner.exit inner.exit: %inc43.lcssa.wide.us = phi i64 [ %inner.iv.next, %inner ] @@ -100,7 +100,7 @@ inner.exit: outer.latch: ; preds = %inner %1 = trunc i64 %inc43.lcssa.wide.us to i32 %outer.iv.next = add nsw i64 %outer.iv, 1 - br i1 undef, label %outer.header, label %outer.exit + br i1 %arg, label %outer.header, label %outer.exit outer.exit: ; preds = %for.cond26.for.end44_crit_edge.us ret void diff --git a/llvm/test/Transforms/LoopInterchange/pr43797-lcssa-for-multiple-outer-loop-blocks.ll b/llvm/test/Transforms/LoopInterchange/pr43797-lcssa-for-multiple-outer-loop-blocks.ll index 1fe34b3d04620bf..a0d0543075ffc3a 100644 --- a/llvm/test/Transforms/LoopInterchange/pr43797-lcssa-for-multiple-outer-loop-blocks.ll +++ b/llvm/test/Transforms/LoopInterchange/pr43797-lcssa-for-multiple-outer-loop-blocks.ll @@ -59,7 +59,7 @@ inner.header: ; preds = %for.inc, %for.bo inner.latch: ; preds = %for.body6 %inner.idx.inc = add nsw i64 %inner.idx, 1 - br i1 undef, label %inner.header, label %inner.exit + br i1 false, label %inner.header, label %inner.exit inner.exit: ; preds = %for.inc %outer.v = add nsw i64 %outer.idx, 1 @@ -67,7 +67,7 @@ inner.exit: ; preds = %for.inc outer.latch: ; preds = %for.end %outer.idx.inc = add nsw i64 %outer.idx, 1 - br i1 undef, label %outer.header, label %outer.exit + br i1 false, label %outer.header, label %outer.exit outer.exit: ; preds = %for.inc27 %exit1.lcssa = phi i64 [ %outer.v, %outer.latch ] @@ -133,7 +133,7 @@ inner.header: ; preds = %for.inc, %for.bo inner.latch: ; preds = %for.body6 %inner.idx.inc = add nsw i64 %inner.idx , 1 - br i1 undef, label %inner.header, label %inner.exit + br i1 false, label %inner.header, label %inner.exit inner.exit: ; preds = %for.inc %outer.v = add nsw i64 %outer.idx, 1 @@ -141,7 +141,7 @@ inner.exit: ; preds = %for.inc outer.latch: ; preds = %for.end %outer.idx.inc = add nsw i64 %outer.idx, 1 - br i1 undef, label %outer.header, label %outer.exit + br i1 false, label %outer.header, label %outer.exit outer.exit: ; preds = %for.inc27 %exit1.lcssa = phi i64 [ 0, %entry ], [ %outer.v, %outer.latch ] diff --git a/llvm/test/Transforms/LoopInterchange/pr57148.ll b/llvm/test/Transforms/LoopInterchange/pr57148.ll index f2b8a93a780bdf5..0d4194762a69216 100644 --- a/llvm/test/Transforms/LoopInterchange/pr57148.ll +++ b/llvm/test/Transforms/LoopInterchange/pr57148.ll @@ -152,11 +152,11 @@ vector.body85: ; preds = %vector.body85, %for %1 = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 %0, i16 %j.165 %2 = load i32, ptr %1, align 1 %index.next87 = add nuw i16 %index86, 4 - br i1 undef, label %middle.block80, label %vector.body85 + br i1 true, label %middle.block80, label %vector.body85 middle.block80: ; preds = %vector.body85 %inc66 = add nuw nsw i16 %j.165, 1 - br i1 undef, label %for.inc68, label %for.cond37.preheader + br i1 true, label %for.inc68, label %for.cond37.preheader for.inc68: ; preds = %middle.block80 %inc69 = add nuw nsw i16 %i.166, 1 diff --git a/llvm/test/Transforms/LoopLoadElim/pr-48150.ll b/llvm/test/Transforms/LoopLoadElim/pr-48150.ll index ee0eaa9b542c87d..6767f877567509f 100644 --- a/llvm/test/Transforms/LoopLoadElim/pr-48150.ll +++ b/llvm/test/Transforms/LoopLoadElim/pr-48150.ll @@ -3,10 +3,10 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" target triple = "x86_64-unknown-linux-gnu" -define void @test() { +define void @test(i1 %arg) { ; CHECK-LABEL: test bb: - br i1 undef, label %bb1, label %bb2 + br i1 %arg, label %bb1, label %bb2 bb1: ; preds = %bb ret void diff --git a/llvm/test/Transforms/LoopLoadElim/pr47457.ll b/llvm/test/Transforms/LoopLoadElim/pr47457.ll index 70b319b563bf477..ddf3ec249fa60a6 100644 --- a/llvm/test/Transforms/LoopLoadElim/pr47457.ll +++ b/llvm/test/Transforms/LoopLoadElim/pr47457.ll @@ -5,7 +5,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 target triple = "x86_64-unknown-linux-gnu" ; Make sure it does not crash with assert. -define void @test() { +define void @test(i1 %arg) { ; CHECK-LABEL: test bb: @@ -13,7 +13,7 @@ bb: bb1: ; preds = %bb6, %bb1, %bb %tmp = phi i32 [ undef, %bb ], [ 0, %bb1 ], [ %tmp3, %bb6 ] - br i1 undef, label %bb1, label %bb2 + br i1 %arg, label %bb1, label %bb2 bb2: ; preds = %bb1 %tmp3 = add i32 %tmp, 1 @@ -24,7 +24,7 @@ bb5: ; preds = %bb2 ret void bb6: ; preds = %bb2 - br i1 undef, label %bb7, label %bb1 + br i1 %arg, label %bb7, label %bb1 bb7: ; preds = %bb7, %bb6 %tmp8 = phi i32 [ %tmp15, %bb7 ], [ %tmp3, %bb6 ] diff --git a/llvm/test/Transforms/LoopPredication/predicate-exits.ll b/llvm/test/Transforms/LoopPredication/predicate-exits.ll index 470ae3bdcac02f1..862b917f8a53d63 100644 --- a/llvm/test/Transforms/LoopPredication/predicate-exits.ll +++ b/llvm/test/Transforms/LoopPredication/predicate-exits.ll @@ -1117,7 +1117,7 @@ bb3: ; preds = %bb bb4: ; preds = %bb6, %bb3 %tmp5 = phi i32 [ %tmp7, %bb6 ], [ 0, %bb3 ] - br i1 undef, label %bb10, label %bb6 + br i1 true, label %bb10, label %bb6 bb6: ; preds = %bb4 %tmp7 = add nuw nsw i32 %tmp5, 1 diff --git a/llvm/test/Transforms/LoopRotate/crash.ll b/llvm/test/Transforms/LoopRotate/crash.ll index 8ca5f6c58d515fd..3a4e813aae28d04 100644 --- a/llvm/test/Transforms/LoopRotate/crash.ll +++ b/llvm/test/Transforms/LoopRotate/crash.ll @@ -4,27 +4,27 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-apple-darwin10.0.0" ; PR8955 - Rotating an outer loop that has a condbr for a latch block. -define void @test1() nounwind ssp { +define void @test1(i1 %arg) nounwind ssp { entry: br label %lbl_283 lbl_283: ; preds = %if.end, %entry - br i1 undef, label %if.else, label %if.then + br i1 %arg, label %if.else, label %if.then if.then: ; preds = %lbl_283 - br i1 undef, label %if.end, label %for.condthread-pre-split + br i1 %arg, label %if.end, label %for.condthread-pre-split for.condthread-pre-split: ; preds = %if.then br label %for.cond for.cond: ; preds = %for.cond, %for.condthread-pre-split - br i1 undef, label %lbl_281, label %for.cond + br i1 %arg, label %lbl_281, label %for.cond lbl_281: ; preds = %if.end, %for.cond br label %if.end if.end: ; preds = %lbl_281, %if.then - br i1 undef, label %lbl_283, label %lbl_281 + br i1 %arg, label %lbl_283, label %lbl_281 if.else: ; preds = %lbl_283 ret void @@ -140,12 +140,12 @@ bb17: ; preds = %bb15 ; PR9523 - Non-canonical loop. -define void @test7(ptr %P) nounwind { +define void @test7(ptr %P, i1 %arg) nounwind { entry: indirectbr ptr %P, [label %"3", label %"5"] "3": ; preds = %"4", %entry - br i1 undef, label %"5", label %"4" + br i1 %arg, label %"5", label %"4" "4": ; preds = %"3" br label %"3" diff --git a/llvm/test/Transforms/LoopRotate/multiple-exits.ll b/llvm/test/Transforms/LoopRotate/multiple-exits.ll index 5832c7613dd147f..748700c2589ffd5 100644 --- a/llvm/test/Transforms/LoopRotate/multiple-exits.ll +++ b/llvm/test/Transforms/LoopRotate/multiple-exits.ll @@ -199,12 +199,12 @@ declare ptr @__cxa_begin_catch(ptr) declare void @__cxa_end_catch() -define void @test4() nounwind uwtable { +define void @test4(i1 %arg) nounwind uwtable { entry: br label %"7" "3": ; preds = %"7" - br i1 undef, label %"31", label %"4" + br i1 %arg, label %"31", label %"4" "4": ; preds = %"3" %. = select i1 undef, float 0x3F50624DE0000000, float undef @@ -217,7 +217,7 @@ entry: br i1 %2, label %"3", label %"8" "8": ; preds = %"7" - br i1 undef, label %"9", label %"31" + br i1 %arg, label %"9", label %"31" "9": ; preds = %"8" br label %"33" @@ -226,7 +226,7 @@ entry: unreachable "31": ; preds = %"8", %"3" - br i1 undef, label %"27", label %"32" + br i1 %arg, label %"27", label %"32" "32": ; preds = %"31" br label %"33" diff --git a/llvm/test/Transforms/LoopRotate/pr22337.ll b/llvm/test/Transforms/LoopRotate/pr22337.ll index 95468e0019bee97..6133b8633859562 100644 --- a/llvm/test/Transforms/LoopRotate/pr22337.ll +++ b/llvm/test/Transforms/LoopRotate/pr22337.ll @@ -3,17 +3,17 @@ @a = external global i8, align 4 @tmp = global ptr @a -define void @f() { +define void @f(i1 %arg) { ; CHECK-LABEL: define void @f( ; CHECK: getelementptr i8, ptr @a, i32 1 entry: br label %for.preheader for.preheader: - br i1 undef, label %if.then8, label %for.body + br i1 %arg, label %if.then8, label %for.body for.body: - br i1 undef, label %if.end, label %if.then8 + br i1 %arg, label %if.end, label %if.then8 if.end: %arrayidx = getelementptr i8, ptr @a, i32 1 diff --git a/llvm/test/Transforms/LoopRotate/pr33701.ll b/llvm/test/Transforms/LoopRotate/pr33701.ll index 6f30c6f4b0e613e..f0421ebc01afe05 100644 --- a/llvm/test/Transforms/LoopRotate/pr33701.ll +++ b/llvm/test/Transforms/LoopRotate/pr33701.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -passes=loop-rotate -verify-dom-info -verify-loop-info -verify-memoryssa -disable-output -define void @func() { +define void @func(i1 %arg) { bb0: br label %bb1 @@ -10,7 +10,7 @@ bb1: ; preds = %bb4, %bb0 br i1 %1, label %bb2, label %bb5 bb2: ; preds = %bb1 - br i1 undef, label %bb6, label %bb4 + br i1 %arg, label %bb6, label %bb4 bb3: ; No predecessors! br label %bb6 diff --git a/llvm/test/Transforms/LoopRotate/pr37205.ll b/llvm/test/Transforms/LoopRotate/pr37205.ll index 06977bee0edb0f0..2fa2c20568bbae3 100644 --- a/llvm/test/Transforms/LoopRotate/pr37205.ll +++ b/llvm/test/Transforms/LoopRotate/pr37205.ll @@ -95,7 +95,7 @@ if.then: ; preds = %for.cond br label %for.cond1 for.cond1: ; preds = %for.cond4, %if.then - br i1 undef, label %for.body, label %for.end6 + br i1 false, label %for.body, label %for.end6 for.body: ; preds = %for.cond1 br i1 false, label %if.then3, label %if.end diff --git a/llvm/test/Transforms/LoopRotate/preserve-loop-simplify.ll b/llvm/test/Transforms/LoopRotate/preserve-loop-simplify.ll index 945c6baf2b357f6..744a576a3bc38a8 100644 --- a/llvm/test/Transforms/LoopRotate/preserve-loop-simplify.ll +++ b/llvm/test/Transforms/LoopRotate/preserve-loop-simplify.ll @@ -4,7 +4,7 @@ ; structures. We manually validate the CFG with FileCheck because currently we ; can't cause a failure when LoopSimplify fails to be preserved. -define void @PR18643() { +define void @PR18643(i1 %arg) { ; CHECK-LABEL: @PR18643( entry: br label %outer.header @@ -12,7 +12,7 @@ entry: outer.header: ; CHECK: outer.header: - br i1 undef, label %inner.header, label %outer.body + br i1 %arg, label %inner.header, label %outer.body ; CHECK-NEXT: br i1 {{[^,]*}}, label %[[INNER_PREROTATE_PREHEADER:[^,]*]], label %outer.body ; CHECK: [[INNER_PREROTATE_PREHEADER]]: @@ -24,13 +24,13 @@ outer.header: inner.header: ; Now the latch! ; CHECK: inner.header: - br i1 undef, label %return, label %inner.body + br i1 %arg, label %return, label %inner.body ; CHECK-NEXT: br i1 {{[^,]*}}, label %[[INNER_SPLIT_RETURN:[^,]*]], label %inner.body inner.body: ; Now the header! ; CHECK: inner.body: - br i1 undef, label %outer.latch, label %inner.latch + br i1 %arg, label %outer.latch, label %inner.latch ; CHECK-NEXT: br i1 {{[^,]*}}, label %[[INNER_SPLIT_OUTER_LATCH:[^,]*]], label %inner.header inner.latch: diff --git a/llvm/test/Transforms/LoopRotate/preserve-mssa.ll b/llvm/test/Transforms/LoopRotate/preserve-mssa.ll index f8d0ed8b44201ad..4135c2966f1fdb9 100644 --- a/llvm/test/Transforms/LoopRotate/preserve-mssa.ll +++ b/llvm/test/Transforms/LoopRotate/preserve-mssa.ll @@ -1,15 +1,15 @@ ; RUN: opt -S -passes=loop-rotate -verify-memoryssa < %s | FileCheck %s ; CHECK-LABEL: @multiedge( -define void @multiedge() { +define void @multiedge(i1 %arg, i32 %arg2) { entry: br label %retry retry: ; preds = %sw.epilog, %entry - br i1 undef, label %cleanup, label %if.end + br i1 %arg, label %cleanup, label %if.end if.end: ; preds = %retry - switch i32 undef, label %sw.epilog [ + switch i32 %arg2, label %sw.epilog [ i32 -3, label %cleanup i32 -5, label %cleanup i32 -16, label %cleanup @@ -24,14 +24,14 @@ cleanup: ; preds = %if.end, %if.end, %i } ; CHECK-LABEL: @read_line( -define internal fastcc i32 @read_line(ptr nocapture %f) unnamed_addr { +define internal fastcc i32 @read_line(ptr nocapture %f, i1 %arg) unnamed_addr { entry: br label %for.cond for.cond: ; preds = %if.end, %entry %call = call ptr @prepbuffer(ptr nonnull undef) %call1 = call ptr @fgets(ptr %call, i32 8192, ptr %f) - br i1 undef, label %if.then, label %if.end + br i1 %arg, label %if.then, label %if.end if.then: ; preds = %for.cond ret i32 undef @@ -47,12 +47,12 @@ declare dso_local i64 @strlen(ptr nocapture) local_unnamed_addr ; CHECK-LABEL: @loop3 -define dso_local fastcc void @loop3() unnamed_addr { +define dso_local fastcc void @loop3(i1 %arg) unnamed_addr { entry: br label %for.cond for.cond: ; preds = %for.body, %entry - br i1 undef, label %for.body, label %for.end81 + br i1 %arg, label %for.body, label %for.end81 for.body: ; preds = %for.cond %.idx122.val = load i32, ptr undef, align 8 @@ -64,12 +64,12 @@ for.end81: ; preds = %for.cond } ; CHECK-LABEL: @loop4 -define dso_local fastcc void @loop4() unnamed_addr { +define dso_local fastcc void @loop4(i1 %arg) unnamed_addr { entry: br label %while.cond while.cond: ; preds = %while.body, %entry - br i1 undef, label %while.end, label %while.body + br i1 %arg, label %while.end, label %while.body while.body: ; preds = %while.cond call fastcc void @cont() @@ -87,7 +87,7 @@ declare dso_local fastcc void @cont() unnamed_addr @glob_array = internal unnamed_addr constant [3 x i32] [i32 1, i32 0, i32 2], align 4 ; Test against failure in MemorySSAUpdater, when rotate clones instructions as Value. ; CHECK-LABEL: @loop5 -define dso_local fastcc void @loop5() unnamed_addr { +define dso_local fastcc void @loop5(i1 %arg) unnamed_addr { entry: br label %for.body @@ -98,7 +98,7 @@ for.body: ; preds = %if.end, %entry %indvar = phi i64 [ %indvar.next, %if.end ], [ 0, %entry ] %array = getelementptr inbounds [3 x i32], ptr @glob_array, i64 0, i64 %indvar %0 = load i32, ptr %array, align 4 - br i1 undef, label %do.cond, label %if.end + br i1 %arg, label %do.cond, label %if.end if.end: ; preds = %for.body store i32 undef, ptr undef, align 4 diff --git a/llvm/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll b/llvm/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll index 89bf91782b06147..f4a639dad369e44 100644 --- a/llvm/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll +++ b/llvm/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll @@ -1,9 +1,9 @@ ; RUN: opt < %s -passes='require,loop-simplify,require' -verify-dom-info -define void @a() nounwind { +define void @a(i1 %arg) nounwind { entry: - br i1 undef, label %bb37, label %bb1.i + br i1 %arg, label %bb37, label %bb1.i bb1.i: ; preds = %bb1.i, %bb %indvar = phi i64 [ %indvar.next, %bb1.i ], [ 0, %entry ] ; [#uses=1] diff --git a/llvm/test/Transforms/LoopSimplify/2010-12-26-PHIInfiniteLoop.ll b/llvm/test/Transforms/LoopSimplify/2010-12-26-PHIInfiniteLoop.ll index b295baf9b01fd88..7c1a166f6677725 100644 --- a/llvm/test/Transforms/LoopSimplify/2010-12-26-PHIInfiniteLoop.ll +++ b/llvm/test/Transforms/LoopSimplify/2010-12-26-PHIInfiniteLoop.ll @@ -5,19 +5,19 @@ target triple = "x86_64-unknown-freebsd9.0" declare void @foo(i32 %x) -define fastcc void @inm_merge() nounwind { +define fastcc void @inm_merge(i1 %arg) nounwind { entry: br label %for.cond for.cond: ; preds = %while.cond36.i, %entry - br i1 undef, label %do.body, label %for.body + br i1 %arg, label %do.body, label %for.body for.body: ; preds = %for.cond - br i1 undef, label %while.cond36.i, label %if.end44 + br i1 %arg, label %while.cond36.i, label %if.end44 if.end44: ; preds = %for.body %call49 = call fastcc i32 @inm_get_source() - br i1 undef, label %if.end54, label %for.cond64 + br i1 %arg, label %if.end54, label %for.cond64 if.end54: ; preds = %if.end44 br label %while.cond36.i @@ -28,10 +28,10 @@ while.cond36.i: ; preds = %if.end54, %for.body for.cond64: ; preds = %if.end88, %for.cond64, %if.end44 %error.161 = phi i32 [ %error.161, %for.cond64 ], [ %error.161, %if.end88 ], [ %call49, %if.end44 ] call void @foo(i32 %error.161) - br i1 undef, label %for.cond64, label %if.end88 + br i1 %arg, label %for.cond64, label %if.end88 if.end88: ; preds = %for.cond64 - br i1 undef, label %for.cond64, label %if.end98 + br i1 %arg, label %for.cond64, label %if.end98 if.end98: ; preds = %if.end88 unreachable diff --git a/llvm/test/Transforms/LoopSimplify/dup-preds.ll b/llvm/test/Transforms/LoopSimplify/dup-preds.ll index d21cb3a02fe4396..c4fa0a596750192 100644 --- a/llvm/test/Transforms/LoopSimplify/dup-preds.ll +++ b/llvm/test/Transforms/LoopSimplify/dup-preds.ll @@ -2,21 +2,21 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64le-unknown-linux" -define fastcc void @do_update_md(ptr nocapture readonly %x) #0 { +define fastcc void @do_update_md(ptr nocapture readonly %x, i1 %arg, i16 %arg2) #0 { entry: - br i1 undef, label %if.end365, label %lor.lhs.false134 + br i1 %arg, label %if.end365, label %lor.lhs.false134 lor.lhs.false134: ; preds = %entry - br i1 undef, label %lor.lhs.false138, label %if.end365 + br i1 %arg, label %lor.lhs.false138, label %if.end365 lor.lhs.false138: ; preds = %lor.lhs.false134 - br i1 undef, label %lor.lhs.false142, label %if.end365 + br i1 %arg, label %lor.lhs.false142, label %if.end365 lor.lhs.false142: ; preds = %lor.lhs.false138 - br i1 undef, label %for.body276.lr.ph, label %if.end365 + br i1 %arg, label %for.body276.lr.ph, label %if.end365 for.body276.lr.ph: ; preds = %lor.lhs.false142 - switch i16 undef, label %if.then288 [ + switch i16 %arg2, label %if.then288 [ i16 4, label %for.body344 i16 2, label %for.body344 ] diff --git a/llvm/test/Transforms/LoopSimplify/indirectbr.ll b/llvm/test/Transforms/LoopSimplify/indirectbr.ll index 6454bfbb5b05998..8da1f2816ff097b 100644 --- a/llvm/test/Transforms/LoopSimplify/indirectbr.ll +++ b/llvm/test/Transforms/LoopSimplify/indirectbr.ll @@ -82,15 +82,15 @@ L1: ret i64 %y } -define void @pr5502() nounwind { +define void @pr5502(ptr %arg, i1 %arg2) nounwind { entry: br label %while.cond while.cond: - br i1 undef, label %while.body, label %while.end + br i1 %arg2, label %while.body, label %while.end while.body: - indirectbr ptr undef, [label %end_opcode, label %end_opcode] + indirectbr ptr %arg, [label %end_opcode, label %end_opcode] end_opcode: br i1 false, label %end_opcode, label %while.cond diff --git a/llvm/test/Transforms/LoopSimplify/notify-scev.ll b/llvm/test/Transforms/LoopSimplify/notify-scev.ll index a3482d133920ea5..ff8b3870550d3a8 100644 --- a/llvm/test/Transforms/LoopSimplify/notify-scev.ll +++ b/llvm/test/Transforms/LoopSimplify/notify-scev.ll @@ -17,12 +17,12 @@ target triple = "x86_64-apple-darwin" ; CHECK-LABEL: for.cond127.preheader: ; CHECK-NOT: for.cond127: ; CHECK-LABEL: for.body129: -define void @t() { +define void @t(i1 %arg) { entry: br label %for.body102 for.body102: - br i1 undef, label %for.cond127.preheader, label %for.inc203 + br i1 %arg, label %for.cond127.preheader, label %for.inc203 for.cond127.preheader: br label %for.body129 @@ -34,10 +34,10 @@ for.cond127: for.body129: %uv.013 = phi i32 [ 0, %for.cond127.preheader ], [ %inc191, %for.cond127 ] %idxprom130 = sext i32 %uv.013 to i64 - br i1 undef, label %for.cond135.preheader.lr.ph, label %for.end185 + br i1 %arg, label %for.cond135.preheader.lr.ph, label %for.end185 for.cond135.preheader.lr.ph: - br i1 undef, label %for.cond135.preheader.lr.ph.split.us, label %for.cond135.preheader.lr.ph.split_crit_edge + br i1 %arg, label %for.cond135.preheader.lr.ph.split.us, label %for.cond135.preheader.lr.ph.split_crit_edge for.cond135.preheader.lr.ph.split_crit_edge: br label %for.cond135.preheader.lr.ph.split @@ -51,17 +51,17 @@ for.cond135.preheader.us: for.end178.us: %add184.us = add nsw i32 %block_y.09.us, 4 - br i1 undef, label %for.end185split.us-lcssa.us, label %for.cond132.us + br i1 %arg, label %for.end185split.us-lcssa.us, label %for.cond132.us for.end174.us: - br i1 undef, label %for.cond138.preheader.us, label %for.cond135.for.end178_crit_edge.us + br i1 %arg, label %for.cond138.preheader.us, label %for.cond135.for.end178_crit_edge.us for.inc172.us: - br i1 undef, label %for.cond142.preheader.us, label %for.end174.us + br i1 %arg, label %for.cond142.preheader.us, label %for.end174.us for.body145.us: %arrayidx163.us = getelementptr inbounds %struct.Params, ptr undef, i64 0, i32 0, i64 %idxprom130, i64 %idxprom146.us - br i1 undef, label %for.body145.us, label %for.inc172.us + br i1 %arg, label %for.body145.us, label %for.inc172.us for.cond142.preheader.us: %j.04.us = phi i32 [ %block_y.09.us, %for.cond138.preheader.us ], [ undef, %for.inc172.us ] @@ -72,7 +72,7 @@ for.cond138.preheader.us: br label %for.cond142.preheader.us for.cond132.us: - br i1 undef, label %for.cond135.preheader.us, label %for.cond132.for.end185_crit_edge.us-lcssa.us + br i1 %arg, label %for.cond135.preheader.us, label %for.cond132.for.end185_crit_edge.us-lcssa.us for.cond138.preheader.lr.ph.us: br label %for.cond138.preheader.us diff --git a/llvm/test/Transforms/LoopSimplify/pr28272.ll b/llvm/test/Transforms/LoopSimplify/pr28272.ll index 3650ff18c6a3685..cd9de1d472981db 100644 --- a/llvm/test/Transforms/LoopSimplify/pr28272.ll +++ b/llvm/test/Transforms/LoopSimplify/pr28272.ll @@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu" ; after loop-simplify, we crash on assertion. ; CHECK-LABEL: @foo -define void @foo() { +define void @foo(i1 %arg) { entry: br label %header @@ -37,7 +37,7 @@ bb54: } ; CHECK-LABEL: @foo2 -define void @foo2() { +define void @foo2(i1 %arg) { entry: br label %outer @@ -66,7 +66,7 @@ loop2.if: i32 1, label %bb] loop2.if.true: - br i1 undef, label %loop2, label %bb + br i1 %arg, label %loop2, label %bb loop2.if.false: br label %loop2 @@ -78,29 +78,29 @@ bb: ; When LoopSimplify separates nested loops, it might break LCSSA form: values ; from the original loop might be used in exit blocks of the outer loop. ; CHECK-LABEL: @foo3 -define void @foo3() { +define void @foo3(i1 %arg) { entry: br label %bb1 bb1: - br i1 undef, label %bb2, label %bb1 + br i1 %arg, label %bb2, label %bb1 bb2: %a = phi i32 [ undef, %bb1 ], [ %a, %bb3 ], [ undef, %bb5 ] - br i1 undef, label %bb3, label %bb1 + br i1 %arg, label %bb3, label %bb1 bb3: %b = load ptr, ptr undef - br i1 undef, label %bb2, label %bb4 + br i1 %arg, label %bb2, label %bb4 bb4: - br i1 undef, label %bb5, label %bb6 + br i1 %arg, label %bb5, label %bb6 bb5: - br i1 undef, label %bb2, label %bb4 + br i1 %arg, label %bb2, label %bb4 bb6: - br i1 undef, label %bb_end, label %bb1 + br i1 %arg, label %bb_end, label %bb1 bb_end: %x = getelementptr i32, ptr %b @@ -112,7 +112,7 @@ bb_end: ; original loop (before separating it was a subloop of the original loop, and ; thus didn't require an lcssa phi nodes). ; CHECK-LABEL: @foo4 -define void @foo4() { +define void @foo4(i1 %arg) { bb1: br label %bb2 @@ -126,7 +126,7 @@ bb2.loopexit: ; preds = %bb3 bb2: ; preds = %bb2.loopexit, %bb2, %bb1 %i = phi i32 [ 0, %bb1 ], [ %i, %bb2 ], [ %i.ph, %bb2.loopexit ] %x = load i32, ptr undef, align 8 - br i1 undef, label %bb2, label %bb3.preheader + br i1 %arg, label %bb2, label %bb3.preheader ; CHECK: bb3.preheader: bb3.preheader: ; preds = %bb2 diff --git a/llvm/test/Transforms/LoopSimplify/pr30454.ll b/llvm/test/Transforms/LoopSimplify/pr30454.ll index d32ecfd69cda6c3..dd1e690d469b763 100644 --- a/llvm/test/Transforms/LoopSimplify/pr30454.ll +++ b/llvm/test/Transforms/LoopSimplify/pr30454.ll @@ -7,18 +7,18 @@ declare i8 @bar() ; Test that we preserve LCSSA form when removing edges from unreachable blocks. ; CHECK-LABEL: @foo -define void @foo() { +define void @foo(i1 %arg) { entry: br label %for.cond for.cond: %x = phi i8 [ undef, %entry ], [ %y, %for.latch ] - br i1 undef, label %for.latch, label %exit + br i1 %arg, label %for.latch, label %exit ; CHECK: unreachable.bb: ; CHECK-NEXT: unreachable unreachable.bb: - br i1 undef, label %exit, label %for.latch + br i1 %arg, label %exit, label %for.latch for.latch: %y = call i8 @bar() diff --git a/llvm/test/Transforms/LoopSimplify/unreachable-loop-pred.ll b/llvm/test/Transforms/LoopSimplify/unreachable-loop-pred.ll index e97c7c29bf57e16..5234325b83c1e4b 100644 --- a/llvm/test/Transforms/LoopSimplify/unreachable-loop-pred.ll +++ b/llvm/test/Transforms/LoopSimplify/unreachable-loop-pred.ll @@ -5,7 +5,7 @@ ; block to the enclosing loop and not get confused by the unreachable ; bogus loop entry. -define void @is_extract_cab() nounwind { +define void @is_extract_cab(i1 %arg) nounwind { entry: br label %header @@ -13,7 +13,7 @@ header: ; preds = %if.end206, %cond.end66, br label %while.body115 while.body115: ; preds = %9, %if.end192, %if.end101 - br i1 undef, label %header, label %while.body115 + br i1 %arg, label %header, label %while.body115 foo: br label %while.body115 diff --git a/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll b/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll index 021af243b4dd624..1ec212f0bb5ea5b 100644 --- a/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll +++ b/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll @@ -2580,12 +2580,12 @@ exit: ret i32 %result } -define void @test_crash_01() { +define void @test_crash_01(i1 %arg, i32 %arg2) { ; CHECK-LABEL: @test_crash_01( ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: br i1 undef, label [[BB17:%.*]], label [[BB2:%.*]] +; CHECK-NEXT: br i1 %arg, label [[BB17:%.*]], label [[BB2:%.*]] ; CHECK: bb2: ; CHECK-NEXT: switch i32 0, label [[BB2_SPLIT:%.*]] [ ; CHECK-NEXT: i32 1, label [[BB19:%.*]] @@ -2593,7 +2593,7 @@ define void @test_crash_01() { ; CHECK: bb2.split: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: -; CHECK-NEXT: switch i32 undef, label [[BB16:%.*]] [ +; CHECK-NEXT: switch i32 %arg2, label [[BB16:%.*]] [ ; CHECK-NEXT: i32 0, label [[BB15:%.*]] ; CHECK-NEXT: i32 1, label [[BB14:%.*]] ; CHECK-NEXT: i32 2, label [[BB13:%.*]] @@ -2607,7 +2607,7 @@ define void @test_crash_01() { ; CHECK: bb7: ; CHECK-NEXT: unreachable ; CHECK: bb8: -; CHECK-NEXT: switch i32 undef, label [[BB28:%.*]] [ +; CHECK-NEXT: switch i32 %arg2, label [[BB28:%.*]] [ ; CHECK-NEXT: i32 0, label [[BB27:%.*]] ; CHECK-NEXT: i32 1, label [[BB26:%.*]] ; CHECK-NEXT: i32 2, label [[BB23:%.*]] @@ -2663,7 +2663,7 @@ bb: br label %bb1 bb1: ; preds = %bb - br i1 undef, label %bb17, label %bb2 + br i1 %arg, label %bb17, label %bb2 bb2: ; preds = %bb1 br label %bb3 @@ -2678,7 +2678,7 @@ bb4: ; preds = %bb3 ] bb5: ; preds = %bb4 - switch i32 undef, label %bb16 [ + switch i32 %arg2, label %bb16 [ i32 0, label %bb15 i32 1, label %bb14 i32 2, label %bb13 @@ -2697,7 +2697,7 @@ bb7: ; preds = %bb5 unreachable bb8: ; preds = %bb11, %bb5 - switch i32 undef, label %bb28 [ + switch i32 %arg2, label %bb28 [ i32 0, label %bb27 i32 1, label %bb26 i32 2, label %bb23 diff --git a/llvm/test/Transforms/LoopSimplifyCFG/update_parents.ll b/llvm/test/Transforms/LoopSimplifyCFG/update_parents.ll index 7acf76306d6b61a..d390f19425f6c9e 100644 --- a/llvm/test/Transforms/LoopSimplifyCFG/update_parents.ll +++ b/llvm/test/Transforms/LoopSimplifyCFG/update_parents.ll @@ -37,10 +37,10 @@ bb3: ; preds = %bb8, %bb3, %bb2 br i1 false, label %bb4, label %bb3 bb4: ; preds = %bb8, %bb3 - br i1 undef, label %bb1, label %bb6 + br i1 true, label %bb1, label %bb6 bb6: ; preds = %bb4 - br i1 undef, label %bb2, label %bb8 + br i1 false, label %bb2, label %bb8 bb8: ; preds = %bb6 br i1 true, label %bb4, label %bb3 @@ -78,7 +78,7 @@ bb3: br i1 false, label %bb4, label %bb3 bb4: - br i1 undef, label %bb1, label %subloop1 + br i1 %c, label %bb1, label %subloop1 subloop1: br i1 %c, label %subloop2, label %subloop11 @@ -111,7 +111,7 @@ subloop2_latch: br label %subloop2 bb6: - br i1 undef, label %bb2, label %bb8 + br i1 %c, label %bb2, label %bb8 bb8: br i1 true, label %bb4, label %bb3 diff --git a/llvm/test/Transforms/LoopStrengthReduce/2011-10-14-IntPtr.ll b/llvm/test/Transforms/LoopStrengthReduce/2011-10-14-IntPtr.ll index b96067370fa124e..770f723fd0ebfc4 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/2011-10-14-IntPtr.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/2011-10-14-IntPtr.ll @@ -8,16 +8,16 @@ target triple = "x86_64-apple-darwin" ; CHECK-LABEL: @test( ; CHECK: phi ; CHECK-NOT: phi -define void @test(i32 %rowStride) ssp align 2 { +define void @test(i32 %rowStride, i1 %arg) ssp align 2 { entry: - %cond = select i1 undef, i32 %rowStride, i32 4 + %cond = select i1 %arg, i32 %rowStride, i32 4 br label %for.end for.end.critedge: ; preds = %for.end br label %for.end for.end: ; preds = %for.end.critedge, %entry - br i1 undef, label %for.body83, label %for.end.critedge + br i1 %arg, label %for.body83, label %for.end.critedge for.body83: ; preds = %for.body83, %for.end %ptr.0157 = phi ptr [ %add.ptr96, %for.body83 ], [ null, %for.end ] diff --git a/llvm/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll b/llvm/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll index 621a4e8797d5779..4241bff1a5e6134 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll @@ -39,7 +39,7 @@ for.body7: %bf.072 = phi i32 [ %t1, %for.body7 ], [ 0, %for.cond.preheader ] %t1 = add i32 %bf.072, %indvars.iv77 %indvars.iv.next78 = add i32 %indvars.iv77, 1 - br i1 undef, label %for.body43, label %for.body7 + br i1 true, label %for.body43, label %for.body7 for.body43: %bf.459 = phi i32 [ %inc44, %for.body43 ], [ %t1, %for.body7 ] diff --git a/llvm/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll b/llvm/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll index 3ad588dbc87d884..db40bba62ebcfa7 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll @@ -15,23 +15,23 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NOT: = ptrtoint ptr undef to i64 ; CHECK: .lr.ph ; CHECK: ret void -define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 { +define void @VerifyDiagnosticConsumerTest(i1 %arg) unnamed_addr nounwind uwtable align 2 { bb: %tmp3 = call ptr @getCharData() nounwind %tmp4 = call ptr @getCharData() nounwind %tmp5 = ptrtoint ptr %tmp4 to i64 %tmp6 = ptrtoint ptr %tmp3 to i64 %tmp7 = sub i64 %tmp5, %tmp6 - br i1 undef, label %bb87, label %.preheader + br i1 false, label %bb87, label %.preheader .preheader: ; preds = %bb10, %bb - br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit42.thread, label %bb10 + br i1 false, label %_ZNK4llvm9StringRef4findEcm.exit42.thread, label %bb10 bb10: ; preds = %.preheader - br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit42, label %.preheader + br i1 true, label %_ZNK4llvm9StringRef4findEcm.exit42, label %.preheader _ZNK4llvm9StringRef4findEcm.exit42: ; preds = %bb10 - br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit42.thread, label %.lr.ph + br i1 false, label %_ZNK4llvm9StringRef4findEcm.exit42.thread, label %.lr.ph _ZNK4llvm9StringRef4findEcm.exit42.thread: ; preds = %_ZNK4llvm9StringRef4findEcm.exit42, %.preheader unreachable @@ -48,7 +48,7 @@ _ZNK4llvm9StringRef4findEcm.exit._crit_edge: ; preds = %bb61, %_ZNK4llvm9St bb36: ; preds = %_ZNK4llvm9StringRef4findEcm.exit.loopexit, %.lr.ph %loc.063 = phi i64 [ undef, %.lr.ph ], [ %i.0.i, %_ZNK4llvm9StringRef4findEcm.exit.loopexit ] - switch i8 undef, label %bb57 [ + switch i8 10, label %bb57 [ i8 10, label %bb48 i8 13, label %bb48 ] @@ -73,7 +73,7 @@ bb63: ; preds = %bb61 %tmp64 = getelementptr inbounds i8, ptr %tmp3, i64 %i.0.i %tmp65 = load i8, ptr %tmp64, align 1 %tmp67 = add i64 %i.0.i, 1 - br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit.loopexit, label %bb61 + br i1 %arg, label %_ZNK4llvm9StringRef4findEcm.exit.loopexit, label %bb61 bb87: ; preds = %bb ret void diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr47329.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr47329.ll index 9c1f91f8b3ed082..45cbdf9265306cb 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr47329.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr47329.ll @@ -4,7 +4,7 @@ target triple = "aarch64-unknown-linux-gnu" @d = internal unnamed_addr global ptr null, align 8 -define dso_local i32 @main() local_unnamed_addr { +define dso_local i32 @main(i1 %arg) local_unnamed_addr { entry: %.pre.pre = load ptr, ptr @d, align 8 br label %for.body9 @@ -12,7 +12,7 @@ entry: for.body9: ; preds = %for.body9, %entry %i = phi ptr [ %.pre.pre, %entry ], [ %incdec.ptr, %for.body9 ] %incdec.ptr = getelementptr inbounds ptr, ptr %i, i64 -1 - br i1 undef, label %for.body9, label %for.inc + br i1 %arg, label %for.body9, label %for.inc for.inc: ; preds = %for.body9 br label %for.body9.118 @@ -20,7 +20,7 @@ for.inc: ; preds = %for.body9 for.body9.1: ; preds = %for.inc.547, %for.body9.1 %i1 = phi ptr [ %incdec.ptr.1, %for.body9.1 ], [ %incdec.ptr.542, %for.inc.547 ] %incdec.ptr.1 = getelementptr inbounds ptr, ptr %i1, i64 -1 - br i1 undef, label %for.body9.1, label %for.inc.1 + br i1 %arg, label %for.body9.1, label %for.inc.1 for.inc.1: ; preds = %for.body9.1 br label %for.body9.1.1 @@ -28,7 +28,7 @@ for.inc.1: ; preds = %for.body9.1 for.body9.2: ; preds = %for.inc.1.5, %for.body9.2 %i2 = phi ptr [ %incdec.ptr.2, %for.body9.2 ], [ %incdec.ptr.1.5, %for.inc.1.5 ] %incdec.ptr.2 = getelementptr inbounds ptr, ptr %i2, i64 -1 - br i1 undef, label %for.body9.2, label %for.inc.2 + br i1 %arg, label %for.body9.2, label %for.inc.2 for.inc.2: ; preds = %for.body9.2 br label %for.body9.2.1 @@ -36,7 +36,7 @@ for.inc.2: ; preds = %for.body9.2 for.body9.3: ; preds = %for.inc.2.5, %for.body9.3 %i3 = phi ptr [ %incdec.ptr.3, %for.body9.3 ], [ %incdec.ptr.2.5, %for.inc.2.5 ] %incdec.ptr.3 = getelementptr inbounds ptr, ptr %i3, i64 -1 - br i1 undef, label %for.body9.3, label %for.inc.3 + br i1 %arg, label %for.body9.3, label %for.inc.3 for.inc.3: ; preds = %for.body9.3 br label %for.body9.3.1 @@ -44,7 +44,7 @@ for.inc.3: ; preds = %for.body9.3 for.body9.4: ; preds = %for.inc.3.5, %for.body9.4 %i4 = phi ptr [ %incdec.ptr.4, %for.body9.4 ], [ %incdec.ptr.3.5, %for.inc.3.5 ] %incdec.ptr.4 = getelementptr inbounds ptr, ptr %i4, i64 -1 - br i1 undef, label %for.body9.4, label %for.inc.4 + br i1 %arg, label %for.body9.4, label %for.inc.4 for.inc.4: ; preds = %for.body9.4 br label %for.body9.4.1 @@ -52,7 +52,7 @@ for.inc.4: ; preds = %for.body9.4 for.body9.5: ; preds = %for.inc.4.5, %for.body9.5 %i5 = phi ptr [ %incdec.ptr.5, %for.body9.5 ], [ %incdec.ptr.4.5, %for.inc.4.5 ] %incdec.ptr.5 = getelementptr inbounds ptr, ptr %i5, i64 -1 - br i1 undef, label %for.body9.5, label %for.inc.5 + br i1 %arg, label %for.body9.5, label %for.inc.5 for.inc.5: ; preds = %for.body9.5 br label %for.body9.5.1 @@ -60,7 +60,7 @@ for.inc.5: ; preds = %for.body9.5 for.body9.5.1: ; preds = %for.body9.5.1, %for.inc.5 %i6 = phi ptr [ %incdec.ptr.5.1, %for.body9.5.1 ], [ %incdec.ptr.5, %for.inc.5 ] %incdec.ptr.5.1 = getelementptr inbounds ptr, ptr %i6, i64 -1 - br i1 undef, label %for.body9.5.1, label %for.inc.5.1 + br i1 %arg, label %for.body9.5.1, label %for.inc.5.1 for.inc.5.1: ; preds = %for.body9.5.1 br label %for.body9.5.2 @@ -68,7 +68,7 @@ for.inc.5.1: ; preds = %for.body9.5.1 for.body9.5.2: ; preds = %for.body9.5.2, %for.inc.5.1 %i7 = phi ptr [ %incdec.ptr.5.2, %for.body9.5.2 ], [ %incdec.ptr.5.1, %for.inc.5.1 ] %incdec.ptr.5.2 = getelementptr inbounds ptr, ptr %i7, i64 -1 - br i1 undef, label %for.body9.5.2, label %for.inc.5.2 + br i1 %arg, label %for.body9.5.2, label %for.inc.5.2 for.inc.5.2: ; preds = %for.body9.5.2 br label %for.body9.5.3 @@ -76,7 +76,7 @@ for.inc.5.2: ; preds = %for.body9.5.2 for.body9.5.3: ; preds = %for.body9.5.3, %for.inc.5.2 %i8 = phi ptr [ %incdec.ptr.5.3, %for.body9.5.3 ], [ %incdec.ptr.5.2, %for.inc.5.2 ] %incdec.ptr.5.3 = getelementptr inbounds ptr, ptr %i8, i64 -1 - br i1 undef, label %for.body9.5.3, label %for.inc.5.3 + br i1 %arg, label %for.body9.5.3, label %for.inc.5.3 for.inc.5.3: ; preds = %for.body9.5.3 br label %for.body9.5.4 @@ -84,7 +84,7 @@ for.inc.5.3: ; preds = %for.body9.5.3 for.body9.5.4: ; preds = %for.body9.5.4, %for.inc.5.3 %i9 = phi ptr [ %incdec.ptr.5.4, %for.body9.5.4 ], [ %incdec.ptr.5.3, %for.inc.5.3 ] %incdec.ptr.5.4 = getelementptr inbounds ptr, ptr %i9, i64 -1 - br i1 undef, label %for.body9.5.4, label %for.inc.5.4 + br i1 %arg, label %for.body9.5.4, label %for.inc.5.4 for.inc.5.4: ; preds = %for.body9.5.4 br label %for.body9.5.5 @@ -97,7 +97,7 @@ for.body9.5.5: ; preds = %for.body9.5.5, %for for.body9.4.1: ; preds = %for.body9.4.1, %for.inc.4 %i13 = phi ptr [ %incdec.ptr.4.1, %for.body9.4.1 ], [ %incdec.ptr.4, %for.inc.4 ] %incdec.ptr.4.1 = getelementptr inbounds ptr, ptr %i13, i64 -1 - br i1 undef, label %for.body9.4.1, label %for.inc.4.1 + br i1 %arg, label %for.body9.4.1, label %for.inc.4.1 for.inc.4.1: ; preds = %for.body9.4.1 br label %for.body9.4.2 @@ -105,7 +105,7 @@ for.inc.4.1: ; preds = %for.body9.4.1 for.body9.4.2: ; preds = %for.body9.4.2, %for.inc.4.1 %i14 = phi ptr [ %incdec.ptr.4.2, %for.body9.4.2 ], [ %incdec.ptr.4.1, %for.inc.4.1 ] %incdec.ptr.4.2 = getelementptr inbounds ptr, ptr %i14, i64 -1 - br i1 undef, label %for.body9.4.2, label %for.inc.4.2 + br i1 %arg, label %for.body9.4.2, label %for.inc.4.2 for.inc.4.2: ; preds = %for.body9.4.2 br label %for.body9.4.3 @@ -113,7 +113,7 @@ for.inc.4.2: ; preds = %for.body9.4.2 for.body9.4.3: ; preds = %for.body9.4.3, %for.inc.4.2 %i15 = phi ptr [ %incdec.ptr.4.3, %for.body9.4.3 ], [ %incdec.ptr.4.2, %for.inc.4.2 ] %incdec.ptr.4.3 = getelementptr inbounds ptr, ptr %i15, i64 -1 - br i1 undef, label %for.body9.4.3, label %for.inc.4.3 + br i1 %arg, label %for.body9.4.3, label %for.inc.4.3 for.inc.4.3: ; preds = %for.body9.4.3 br label %for.body9.4.4 @@ -121,7 +121,7 @@ for.inc.4.3: ; preds = %for.body9.4.3 for.body9.4.4: ; preds = %for.body9.4.4, %for.inc.4.3 %i16 = phi ptr [ %incdec.ptr.4.4, %for.body9.4.4 ], [ %incdec.ptr.4.3, %for.inc.4.3 ] %incdec.ptr.4.4 = getelementptr inbounds ptr, ptr %i16, i64 -1 - br i1 undef, label %for.body9.4.4, label %for.inc.4.4 + br i1 %arg, label %for.body9.4.4, label %for.inc.4.4 for.inc.4.4: ; preds = %for.body9.4.4 br label %for.body9.4.5 @@ -129,7 +129,7 @@ for.inc.4.4: ; preds = %for.body9.4.4 for.body9.4.5: ; preds = %for.body9.4.5, %for.inc.4.4 %i17 = phi ptr [ %incdec.ptr.4.5, %for.body9.4.5 ], [ %incdec.ptr.4.4, %for.inc.4.4 ] %incdec.ptr.4.5 = getelementptr inbounds ptr, ptr %i17, i64 -1 - br i1 undef, label %for.body9.4.5, label %for.inc.4.5 + br i1 %arg, label %for.body9.4.5, label %for.inc.4.5 for.inc.4.5: ; preds = %for.body9.4.5 br label %for.body9.5 @@ -137,7 +137,7 @@ for.inc.4.5: ; preds = %for.body9.4.5 for.body9.3.1: ; preds = %for.body9.3.1, %for.inc.3 %i18 = phi ptr [ %incdec.ptr.3.1, %for.body9.3.1 ], [ %incdec.ptr.3, %for.inc.3 ] %incdec.ptr.3.1 = getelementptr inbounds ptr, ptr %i18, i64 -1 - br i1 undef, label %for.body9.3.1, label %for.inc.3.1 + br i1 %arg, label %for.body9.3.1, label %for.inc.3.1 for.inc.3.1: ; preds = %for.body9.3.1 br label %for.body9.3.2 @@ -145,7 +145,7 @@ for.inc.3.1: ; preds = %for.body9.3.1 for.body9.3.2: ; preds = %for.body9.3.2, %for.inc.3.1 %i19 = phi ptr [ %incdec.ptr.3.2, %for.body9.3.2 ], [ %incdec.ptr.3.1, %for.inc.3.1 ] %incdec.ptr.3.2 = getelementptr inbounds ptr, ptr %i19, i64 -1 - br i1 undef, label %for.body9.3.2, label %for.inc.3.2 + br i1 %arg, label %for.body9.3.2, label %for.inc.3.2 for.inc.3.2: ; preds = %for.body9.3.2 br label %for.body9.3.3 @@ -153,7 +153,7 @@ for.inc.3.2: ; preds = %for.body9.3.2 for.body9.3.3: ; preds = %for.body9.3.3, %for.inc.3.2 %i20 = phi ptr [ %incdec.ptr.3.3, %for.body9.3.3 ], [ %incdec.ptr.3.2, %for.inc.3.2 ] %incdec.ptr.3.3 = getelementptr inbounds ptr, ptr %i20, i64 -1 - br i1 undef, label %for.body9.3.3, label %for.inc.3.3 + br i1 %arg, label %for.body9.3.3, label %for.inc.3.3 for.inc.3.3: ; preds = %for.body9.3.3 br label %for.body9.3.4 @@ -161,7 +161,7 @@ for.inc.3.3: ; preds = %for.body9.3.3 for.body9.3.4: ; preds = %for.body9.3.4, %for.inc.3.3 %i21 = phi ptr [ %incdec.ptr.3.4, %for.body9.3.4 ], [ %incdec.ptr.3.3, %for.inc.3.3 ] %incdec.ptr.3.4 = getelementptr inbounds ptr, ptr %i21, i64 -1 - br i1 undef, label %for.body9.3.4, label %for.inc.3.4 + br i1 %arg, label %for.body9.3.4, label %for.inc.3.4 for.inc.3.4: ; preds = %for.body9.3.4 br label %for.body9.3.5 @@ -169,7 +169,7 @@ for.inc.3.4: ; preds = %for.body9.3.4 for.body9.3.5: ; preds = %for.body9.3.5, %for.inc.3.4 %i22 = phi ptr [ %incdec.ptr.3.5, %for.body9.3.5 ], [ %incdec.ptr.3.4, %for.inc.3.4 ] %incdec.ptr.3.5 = getelementptr inbounds ptr, ptr %i22, i64 -1 - br i1 undef, label %for.body9.3.5, label %for.inc.3.5 + br i1 %arg, label %for.body9.3.5, label %for.inc.3.5 for.inc.3.5: ; preds = %for.body9.3.5 br label %for.body9.4 @@ -177,7 +177,7 @@ for.inc.3.5: ; preds = %for.body9.3.5 for.body9.2.1: ; preds = %for.body9.2.1, %for.inc.2 %i23 = phi ptr [ %incdec.ptr.2.1, %for.body9.2.1 ], [ %incdec.ptr.2, %for.inc.2 ] %incdec.ptr.2.1 = getelementptr inbounds ptr, ptr %i23, i64 -1 - br i1 undef, label %for.body9.2.1, label %for.inc.2.1 + br i1 %arg, label %for.body9.2.1, label %for.inc.2.1 for.inc.2.1: ; preds = %for.body9.2.1 br label %for.body9.2.2 @@ -185,7 +185,7 @@ for.inc.2.1: ; preds = %for.body9.2.1 for.body9.2.2: ; preds = %for.body9.2.2, %for.inc.2.1 %i24 = phi ptr [ %incdec.ptr.2.2, %for.body9.2.2 ], [ %incdec.ptr.2.1, %for.inc.2.1 ] %incdec.ptr.2.2 = getelementptr inbounds ptr, ptr %i24, i64 -1 - br i1 undef, label %for.body9.2.2, label %for.inc.2.2 + br i1 %arg, label %for.body9.2.2, label %for.inc.2.2 for.inc.2.2: ; preds = %for.body9.2.2 br label %for.body9.2.3 @@ -193,7 +193,7 @@ for.inc.2.2: ; preds = %for.body9.2.2 for.body9.2.3: ; preds = %for.body9.2.3, %for.inc.2.2 %i25 = phi ptr [ %incdec.ptr.2.3, %for.body9.2.3 ], [ %incdec.ptr.2.2, %for.inc.2.2 ] %incdec.ptr.2.3 = getelementptr inbounds ptr, ptr %i25, i64 -1 - br i1 undef, label %for.body9.2.3, label %for.inc.2.3 + br i1 %arg, label %for.body9.2.3, label %for.inc.2.3 for.inc.2.3: ; preds = %for.body9.2.3 br label %for.body9.2.4 @@ -201,7 +201,7 @@ for.inc.2.3: ; preds = %for.body9.2.3 for.body9.2.4: ; preds = %for.body9.2.4, %for.inc.2.3 %i26 = phi ptr [ %incdec.ptr.2.4, %for.body9.2.4 ], [ %incdec.ptr.2.3, %for.inc.2.3 ] %incdec.ptr.2.4 = getelementptr inbounds ptr, ptr %i26, i64 -1 - br i1 undef, label %for.body9.2.4, label %for.inc.2.4 + br i1 %arg, label %for.body9.2.4, label %for.inc.2.4 for.inc.2.4: ; preds = %for.body9.2.4 br label %for.body9.2.5 @@ -209,7 +209,7 @@ for.inc.2.4: ; preds = %for.body9.2.4 for.body9.2.5: ; preds = %for.body9.2.5, %for.inc.2.4 %i27 = phi ptr [ %incdec.ptr.2.5, %for.body9.2.5 ], [ %incdec.ptr.2.4, %for.inc.2.4 ] %incdec.ptr.2.5 = getelementptr inbounds ptr, ptr %i27, i64 -1 - br i1 undef, label %for.body9.2.5, label %for.inc.2.5 + br i1 %arg, label %for.body9.2.5, label %for.inc.2.5 for.inc.2.5: ; preds = %for.body9.2.5 br label %for.body9.3 @@ -217,7 +217,7 @@ for.inc.2.5: ; preds = %for.body9.2.5 for.body9.1.1: ; preds = %for.body9.1.1, %for.inc.1 %i28 = phi ptr [ %incdec.ptr.1.1, %for.body9.1.1 ], [ %incdec.ptr.1, %for.inc.1 ] %incdec.ptr.1.1 = getelementptr inbounds ptr, ptr %i28, i64 -1 - br i1 undef, label %for.body9.1.1, label %for.inc.1.1 + br i1 %arg, label %for.body9.1.1, label %for.inc.1.1 for.inc.1.1: ; preds = %for.body9.1.1 br label %for.body9.1.2 @@ -225,7 +225,7 @@ for.inc.1.1: ; preds = %for.body9.1.1 for.body9.1.2: ; preds = %for.body9.1.2, %for.inc.1.1 %i29 = phi ptr [ %incdec.ptr.1.2, %for.body9.1.2 ], [ %incdec.ptr.1.1, %for.inc.1.1 ] %incdec.ptr.1.2 = getelementptr inbounds ptr, ptr %i29, i64 -1 - br i1 undef, label %for.body9.1.2, label %for.inc.1.2 + br i1 %arg, label %for.body9.1.2, label %for.inc.1.2 for.inc.1.2: ; preds = %for.body9.1.2 br label %for.body9.1.3 @@ -233,7 +233,7 @@ for.inc.1.2: ; preds = %for.body9.1.2 for.body9.1.3: ; preds = %for.body9.1.3, %for.inc.1.2 %i30 = phi ptr [ %incdec.ptr.1.3, %for.body9.1.3 ], [ %incdec.ptr.1.2, %for.inc.1.2 ] %incdec.ptr.1.3 = getelementptr inbounds ptr, ptr %i30, i64 -1 - br i1 undef, label %for.body9.1.3, label %for.inc.1.3 + br i1 %arg, label %for.body9.1.3, label %for.inc.1.3 for.inc.1.3: ; preds = %for.body9.1.3 br label %for.body9.1.4 @@ -241,7 +241,7 @@ for.inc.1.3: ; preds = %for.body9.1.3 for.body9.1.4: ; preds = %for.body9.1.4, %for.inc.1.3 %i31 = phi ptr [ %incdec.ptr.1.4, %for.body9.1.4 ], [ %incdec.ptr.1.3, %for.inc.1.3 ] %incdec.ptr.1.4 = getelementptr inbounds ptr, ptr %i31, i64 -1 - br i1 undef, label %for.body9.1.4, label %for.inc.1.4 + br i1 %arg, label %for.body9.1.4, label %for.inc.1.4 for.inc.1.4: ; preds = %for.body9.1.4 br label %for.body9.1.5 @@ -249,7 +249,7 @@ for.inc.1.4: ; preds = %for.body9.1.4 for.body9.1.5: ; preds = %for.body9.1.5, %for.inc.1.4 %i32 = phi ptr [ %incdec.ptr.1.5, %for.body9.1.5 ], [ %incdec.ptr.1.4, %for.inc.1.4 ] %incdec.ptr.1.5 = getelementptr inbounds ptr, ptr %i32, i64 -1 - br i1 undef, label %for.body9.1.5, label %for.inc.1.5 + br i1 %arg, label %for.body9.1.5, label %for.inc.1.5 for.inc.1.5: ; preds = %for.body9.1.5 br label %for.body9.2 @@ -257,7 +257,7 @@ for.inc.1.5: ; preds = %for.body9.1.5 for.body9.118: ; preds = %for.body9.118, %for.inc %i33 = phi ptr [ %incdec.ptr, %for.inc ], [ %incdec.ptr.114, %for.body9.118 ] %incdec.ptr.114 = getelementptr inbounds ptr, ptr %i33, i64 -1 - br i1 undef, label %for.body9.118, label %for.inc.119 + br i1 %arg, label %for.body9.118, label %for.inc.119 for.inc.119: ; preds = %for.body9.118 br label %for.body9.225 @@ -266,7 +266,7 @@ for.body9.225: ; preds = %for.body9.225, %for %i34 = phi ptr [ %incdec.ptr.114, %for.inc.119 ], [ %incdec.ptr.221, %for.body9.225 ] %incdec.ptr.221 = getelementptr inbounds ptr, ptr %i34, i64 -1 %i36 = load i64, ptr %i34, align 8 - br i1 undef, label %for.body9.225, label %for.inc.226 + br i1 %arg, label %for.body9.225, label %for.inc.226 for.inc.226: ; preds = %for.body9.225 br label %for.body9.332 @@ -274,7 +274,7 @@ for.inc.226: ; preds = %for.body9.225 for.body9.332: ; preds = %for.body9.332, %for.inc.226 %i37 = phi ptr [ %incdec.ptr.221, %for.inc.226 ], [ %incdec.ptr.328, %for.body9.332 ] %incdec.ptr.328 = getelementptr inbounds ptr, ptr %i37, i64 -1 - br i1 undef, label %for.body9.332, label %for.inc.333 + br i1 %arg, label %for.body9.332, label %for.inc.333 for.inc.333: ; preds = %for.body9.332 br label %for.body9.439 @@ -282,7 +282,7 @@ for.inc.333: ; preds = %for.body9.332 for.body9.439: ; preds = %for.body9.439, %for.inc.333 %i38 = phi ptr [ %incdec.ptr.328, %for.inc.333 ], [ %incdec.ptr.435, %for.body9.439 ] %incdec.ptr.435 = getelementptr inbounds ptr, ptr %i38, i64 -1 - br i1 undef, label %for.body9.439, label %for.inc.440 + br i1 %arg, label %for.body9.439, label %for.inc.440 for.inc.440: ; preds = %for.body9.439 br label %for.body9.546 @@ -290,7 +290,7 @@ for.inc.440: ; preds = %for.body9.439 for.body9.546: ; preds = %for.body9.546, %for.inc.440 %i39 = phi ptr [ %incdec.ptr.435, %for.inc.440 ], [ %incdec.ptr.542, %for.body9.546 ] %incdec.ptr.542 = getelementptr inbounds ptr, ptr %i39, i64 -1 - br i1 undef, label %for.body9.546, label %for.inc.547 + br i1 %arg, label %for.body9.546, label %for.inc.547 for.inc.547: ; preds = %for.body9.546 br label %for.body9.1 diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll index 737a590394e5ff8..8111eeb6ec71d59 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll @@ -59,10 +59,10 @@ for.body: br i1 false, label %loopexit, label %for.body } -define protected amdgpu_kernel void @baseregtest(i32 %n, i32 %lda) local_unnamed_addr { +define protected amdgpu_kernel void @baseregtest(i32 %n, i32 %lda, i1 %arg) local_unnamed_addr { ; CHECK-LABEL: @baseregtest( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[EXIT:%.*]], label [[IF_END:%.*]] +; CHECK-NEXT: br i1 %arg, label [[EXIT:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: ; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo() ; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 3 @@ -86,7 +86,7 @@ define protected amdgpu_kernel void @baseregtest(i32 %n, i32 %lda) local_unnamed ; CHECK-NEXT: ret void ; entry: - br i1 undef, label %exit, label %if.end + br i1 %arg, label %exit, label %if.end if.end: %0 = tail call i32 @foo() diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-void-inseltpoison.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-void-inseltpoison.ll index f92e39d60717650..da502b1ffa9de05 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-void-inseltpoison.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-void-inseltpoison.ll @@ -3,7 +3,7 @@ @array = external addrspace(4) constant [32 x [800 x i32]], align 4 ; GCN-LABEL: {{^}}test_lsr_voidty: -define amdgpu_kernel void @test_lsr_voidty() { +define amdgpu_kernel void @test_lsr_voidty(i1 %arg) { entry: br label %for.body @@ -32,5 +32,5 @@ for.body.i: ; preds = %for.body.i, %for.body %reorder_shuffle2 = shufflevector <4 x i32> %tmp5, <4 x i32> poison, <4 x i32> %tmp6 = select <4 x i1> undef, <4 x i32> zeroinitializer, <4 x i32> %reorder_shuffle2 %inc14 = add nuw nsw i32 %ij, 4 - br i1 undef, label %for.body, label %for.body.i + br i1 %arg, label %for.body, label %for.body.i } diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-void.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-void.ll index 1069c0f7f9dd072..c363e81cf3c4d9d 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-void.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-void.ll @@ -3,7 +3,7 @@ @array = external addrspace(4) constant [32 x [800 x i32]], align 4 ; GCN-LABEL: {{^}}test_lsr_voidty: -define amdgpu_kernel void @test_lsr_voidty() { +define amdgpu_kernel void @test_lsr_voidty(i1 %arg) { entry: br label %for.body @@ -32,5 +32,5 @@ for.body.i: ; preds = %for.body.i, %for.body %reorder_shuffle2 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> %tmp6 = select <4 x i1> undef, <4 x i32> zeroinitializer, <4 x i32> %reorder_shuffle2 %inc14 = add nuw nsw i32 %ij, 4 - br i1 undef, label %for.body, label %for.body.i + br i1 %arg, label %for.body, label %for.body.i } diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll index 8f1c95fd4a330b3..ffb23575879a5d2 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll @@ -48,7 +48,7 @@ bb1: ; preds = %bb17, %bb br label %bb4 bb4: ; preds = %bb1 - br i1 undef, label %bb8, label %bb5 + br i1 false, label %bb8, label %bb5 bb5: ; preds = %bb4 unreachable diff --git a/llvm/test/Transforms/LoopStrengthReduce/ARM/addrec-is-loop-invariant.ll b/llvm/test/Transforms/LoopStrengthReduce/ARM/addrec-is-loop-invariant.ll index 2ddc8af4f5f9f58..cf9c1fb49cce70b 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/ARM/addrec-is-loop-invariant.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/ARM/addrec-is-loop-invariant.ll @@ -4,14 +4,14 @@ ; not create an Add Reccurence Expression if not all ; its operands are loop invariants. -define void @add_rec_expr() { +define void @add_rec_expr(i1 %arg) { entry: br label %loop0 loop0: %c.0 = phi i32 [ 0, %entry ], [ %inc.0, %loop0 ] %inc.0 = add nuw i32 %c.0, 1 - br i1 undef, label %loop0, label %bb1 + br i1 %arg, label %loop0, label %bb1 bb1: %mul.0 = mul i32 %c.0, %c.0 diff --git a/llvm/test/Transforms/LoopStrengthReduce/Power/incomplete-phi.ll b/llvm/test/Transforms/LoopStrengthReduce/Power/incomplete-phi.ll index 53aac1d9cf7f8df..8f7ca70935628f9 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/Power/incomplete-phi.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/Power/incomplete-phi.ll @@ -58,7 +58,7 @@ bb: bb3: ; preds = %bb18, %bb %i4 = phi i64 [ %i20, %bb18 ], [ 0, %bb ] %i5 = phi i64 [ %i21, %bb18 ], [ 1, %bb ] - br i1 undef, label %bb22, label %bb9 + br i1 true, label %bb22, label %bb9 bb9: ; preds = %bb9, %bb3 %i10 = phi i64 [ 0, %bb3 ], [ %i16, %bb9 ] @@ -70,7 +70,7 @@ bb9: ; preds = %bb9, %bb3 br i1 true, label %bb17, label %bb9 bb17: ; preds = %bb9 - br i1 undef, label %bb18, label %bb22 + br i1 false, label %bb18, label %bb22 bb18: ; preds = %bb17 %i19 = add i64 undef, %i4 diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2009-11-10-LSRCrash.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2009-11-10-LSRCrash.ll index 4032a599e8d94ae..5098030cc49e82d 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/2009-11-10-LSRCrash.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2009-11-10-LSRCrash.ll @@ -1,35 +1,35 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin11 -define void @_ZN4llvm20SelectionDAGLowering14visitInlineAsmENS_8CallSiteE() nounwind ssp align 2 { +define void @_ZN4llvm20SelectionDAGLowering14visitInlineAsmENS_8CallSiteE(i1 %arg) nounwind ssp align 2 { entry: - br i1 undef, label %bb3.i, label %bb4.i + br i1 %arg, label %bb3.i, label %bb4.i bb3.i: ; preds = %entry unreachable bb4.i: ; preds = %entry - br i1 undef, label %bb.i.i, label %_ZNK4llvm8CallSite14getCalledValueEv.exit + br i1 %arg, label %bb.i.i, label %_ZNK4llvm8CallSite14getCalledValueEv.exit bb.i.i: ; preds = %bb4.i unreachable _ZNK4llvm8CallSite14getCalledValueEv.exit: ; preds = %bb4.i - br i1 undef, label %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit, label %bb6.i + br i1 %arg, label %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit, label %bb6.i bb6.i: ; preds = %_ZNK4llvm8CallSite14getCalledValueEv.exit unreachable _ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit: ; preds = %_ZNK4llvm8CallSite14getCalledValueEv.exit - br i1 undef, label %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit, label %bb.i + br i1 %arg, label %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit, label %bb.i bb.i: ; preds = %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit br label %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit _ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit: ; preds = %bb.i, %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit - br i1 undef, label %bb50, label %bb27 + br i1 %arg, label %bb50, label %bb27 bb27: ; preds = %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit - br i1 undef, label %bb1.i727, label %bb.i.i726 + br i1 %arg, label %bb1.i727, label %bb.i.i726 bb.i.i726: ; preds = %bb27 unreachable @@ -41,7 +41,7 @@ bb50: ; preds = %_ZL25hasInlineAsmMe br label %bb107 bb51: ; preds = %bb107 - br i1 undef, label %bb105, label %bb106 + br i1 %arg, label %bb105, label %bb106 bb105: ; preds = %bb51 unreachable @@ -50,16 +50,16 @@ bb106: ; preds = %bb51 br label %bb107 bb107: ; preds = %bb106, %bb50 - br i1 undef, label %bb108, label %bb51 + br i1 %arg, label %bb108, label %bb51 bb108: ; preds = %bb107 - br i1 undef, label %bb242, label %bb114 + br i1 %arg, label %bb242, label %bb114 bb114: ; preds = %bb108 - br i1 undef, label %bb141, label %bb116 + br i1 %arg, label %bb141, label %bb116 bb116: ; preds = %bb114 - br i1 undef, label %bb120, label %bb121 + br i1 %arg, label %bb120, label %bb121 bb120: ; preds = %bb116 unreachable @@ -68,7 +68,7 @@ bb121: ; preds = %bb116 unreachable bb141: ; preds = %bb114 - br i1 undef, label %bb182, label %bb143 + br i1 %arg, label %bb182, label %bb143 bb143: ; preds = %bb141 br label %bb157 @@ -99,7 +99,7 @@ bb6.i841: ; preds = %bb157 unreachable _ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit: ; preds = %bb157, %bb157 - br i1 undef, label %bb.i.i.i843, label %bb1.i.i.i844 + br i1 %arg, label %bb.i.i.i843, label %bb1.i.i.i844 bb.i.i.i843: ; preds = %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit br i1 %0, label %bb158, label %bb144 @@ -108,13 +108,13 @@ bb1.i.i.i844: ; preds = %_ZN4llvm4castINS_14 unreachable bb158: ; preds = %bb.i.i.i843 - br i1 undef, label %bb177, label %bb176 + br i1 %arg, label %bb177, label %bb176 bb176: ; preds = %bb158 unreachable bb177: ; preds = %bb158 - br i1 undef, label %bb179, label %bb178 + br i1 %arg, label %bb179, label %bb178 bb178: ; preds = %bb177 unreachable diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll index 0fc928ca9b28833..f41ff1bd76438f0 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll @@ -21,7 +21,7 @@ loop: %i.01 = phi i32 [ -39, %entry ], [ %inc, %loop ] %conv = sitofp i32 %i.01 to double %inc = add nsw i32 %i.01, 1 - br i1 undef, label %loop, label %for.end + br i1 true, label %loop, label %for.end for.end: unreachable @@ -40,7 +40,7 @@ loop: %conv = sitofp i32 %i.01 to double %div = fdiv double %conv, 4.000000e+01 %inc = add nsw i32 %i.01, 1 - br i1 undef, label %loop, label %for.end + br i1 false, label %loop, label %for.end for.end: unreachable diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll index 38f18f68e2db72c..f780bee7874cfb0 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll @@ -88,10 +88,10 @@ exit: ; preds = %cond.true29.i, %cond.true.i ; Test phi reuse after LSR that requires SCEVExpander to hoist an ; interesting GEP. ; -define void @test2(i32 %n) nounwind uwtable { +define void @test2(i32 %n, i1 %arg) nounwind uwtable { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[WHILE_END:%.*]], label [[FOR_COND468_PREHEADER:%.*]] +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[WHILE_END:%.*]], label [[FOR_COND468_PREHEADER:%.*]] ; CHECK: for.cond468.preheader: ; CHECK-NEXT: br label [[FOR_COND468:%.*]] ; CHECK: for.cond468: @@ -103,12 +103,12 @@ define void @test2(i32 %n) nounwind uwtable { ; CHECK: for.body471: ; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[LSR_IV]], i64 8 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCEVGEP2]], align 4 -; CHECK-NEXT: br i1 false, label [[IF_THEN477]], label [[FOR_INC498_PREHEADER]] +; CHECK-NEXT: br i1 [[ARG]], label [[IF_THEN477]], label [[FOR_INC498_PREHEADER]] ; CHECK: for.inc498.preheader: ; CHECK-NEXT: br label [[FOR_INC498:%.*]] ; CHECK: if.then477: ; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 12 -; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i32 [[LSR_IV1]], 1 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw i32 [[LSR_IV1]], 1 ; CHECK-NEXT: br label [[FOR_COND468]] ; CHECK: for.inc498: ; CHECK-NEXT: br label [[FOR_INC498]] @@ -116,7 +116,7 @@ define void @test2(i32 %n) nounwind uwtable { ; CHECK-NEXT: ret void ; entry: - br i1 undef, label %while.end, label %for.cond468 + br i1 %arg, label %while.end, label %for.cond468 for.cond468: ; preds = %if.then477, %entry %indvars.iv1163 = phi i64 [ %indvars.iv.next1164, %if.then477 ], [ 1, %entry ] @@ -129,7 +129,7 @@ for.cond468: ; preds = %if.then477, %entry for.body471: ; preds = %for.cond468 %first = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], ptr @tags, i64 0, i64 %indvars.iv1163, i32 1 %1 = load i32, ptr %first, align 4 - br i1 undef, label %if.then477, label %for.inc498 + br i1 %arg, label %if.then477, label %for.inc498 if.then477: ; preds = %for.body471 %last = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], ptr @tags, i64 0, i64 %indvars.iv1163, i32 2 @@ -147,21 +147,21 @@ while.end: ; preds = %entry ; Test redundant phi elimination when the deleted phi's increment is ; itself a phi. ; -define fastcc void @test3(ptr nocapture %u) nounwind uwtable ssp { +define fastcc void @test3(ptr nocapture %u, i1 %arg) nounwind uwtable ssp { ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[MESHBB1_PREHEADER:%.*]], label [[MESHBB5:%.*]] +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[MESHBB1_PREHEADER:%.*]], label [[MESHBB5:%.*]] ; CHECK: meshBB1.preheader: ; CHECK-NEXT: br label [[MESHBB1:%.*]] ; CHECK: for.inc8.us.i: -; CHECK-NEXT: br i1 true, label [[MESHBB1_LOOPEXIT:%.*]], label [[MESHBB:%.*]] +; CHECK-NEXT: br i1 [[ARG]], label [[MESHBB1_LOOPEXIT:%.*]], label [[MESHBB:%.*]] ; CHECK: for.body3.us.i: ; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[MESHBB]] ], [ [[TMP3:%.*]], [[FOR_BODY3_LR_PH_US_I:%.*]] ] ; CHECK-NEXT: [[SCEVGEP:%.*]] = phi ptr [ [[SCEVGEP1:%.*]], [[MESHBB]] ], [ [[U:%.*]], [[FOR_BODY3_LR_PH_US_I]] ] ; CHECK-NEXT: [[OPQ_SA_CALC12:%.*]] = sub i32 undef, 227 ; CHECK-NEXT: [[MUL_I_US_I:%.*]] = mul nsw i32 0, [[TMP]] ; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[SCEVGEP]], align 8 -; CHECK-NEXT: br i1 undef, label [[FOR_INC8_US_I:%.*]], label [[MESHBB]] +; CHECK-NEXT: br i1 [[ARG]], label [[FOR_INC8_US_I:%.*]], label [[MESHBB]] ; CHECK: for.body3.lr.ph.us.i.loopexit: ; CHECK-NEXT: br label [[FOR_BODY3_LR_PH_US_I]] ; CHECK: for.body3.lr.ph.us.i: @@ -179,19 +179,19 @@ define fastcc void @test3(ptr nocapture %u) nounwind uwtable ssp { ; CHECK-NEXT: [[MESHSTACKVARIABLE_PHI:%.*]] = phi i32 [ [[OPQ_SA_CALC12]], [[FOR_BODY3_US_I]] ], [ undef, [[FOR_INC8_US_I]] ] ; CHECK-NEXT: [[SCEVGEP1]] = getelementptr i8, ptr [[SCEVGEP]], i64 8 ; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[TMP]], 1 -; CHECK-NEXT: br i1 true, label [[FOR_BODY3_LR_PH_US_I_LOOPEXIT]], label [[FOR_BODY3_US_I]] +; CHECK-NEXT: br i1 [[ARG]], label [[FOR_BODY3_LR_PH_US_I_LOOPEXIT]], label [[FOR_BODY3_US_I]] ; CHECK: meshBB1.loopexit: ; CHECK-NEXT: br label [[MESHBB1]] ; CHECK: meshBB1: ; CHECK-NEXT: br label [[FOR_BODY3_LR_PH_US_I]] ; CHECK: meshBB5: -; CHECK-NEXT: br i1 undef, label [[EVAL_AT_TIMES_U_EXIT:%.*]], label [[FOR_INC8_US_I2:%.*]] +; CHECK-NEXT: br i1 [[ARG]], label [[EVAL_AT_TIMES_U_EXIT:%.*]], label [[FOR_INC8_US_I2:%.*]] ; entry: - br i1 undef, label %meshBB1, label %meshBB5 + br i1 %arg, label %meshBB1, label %meshBB5 for.inc8.us.i: ; preds = %for.body3.us.i - br i1 undef, label %meshBB1, label %meshBB + br i1 %arg, label %meshBB1, label %meshBB for.body3.us.i: ; preds = %meshBB, %for.body3.lr.ph.us.i %indvars.iv.i.SV.phi = phi i64 [ %indvars.iv.next.i, %meshBB ], [ 0, %for.body3.lr.ph.us.i ] @@ -203,7 +203,7 @@ for.body3.us.i: ; preds = %meshBB, %for.body3. %arrayidx5.us.i = getelementptr inbounds double, ptr %u, i64 %indvars.iv.i.SV.phi %2 = load double, ptr %arrayidx5.us.i, align 8 %indvars.iv.next.i = add i64 %indvars.iv.i.SV.phi, 1 - br i1 undef, label %for.inc8.us.i, label %meshBB + br i1 %arg, label %for.inc8.us.i, label %meshBB for.body3.lr.ph.us.i: ; preds = %meshBB1, %meshBB %indvars.iv8.i.SV.phi26 = phi i64 [ undef, %meshBB1 ], [ %indvars.iv8.i.SV.phi24, %meshBB ] @@ -220,11 +220,11 @@ eval_At_times_u.exit: ; preds = %meshBB5 meshBB: ; preds = %for.body3.us.i, %for.inc8.us.i %indvars.iv8.i.SV.phi24 = phi i64 [ undef, %for.body3.us.i ], [ %3, %for.inc8.us.i ] %meshStackVariable.phi = phi i32 [ %Opq.sa.calc12, %for.body3.us.i ], [ undef, %for.inc8.us.i ] - br i1 undef, label %for.body3.lr.ph.us.i, label %for.body3.us.i + br i1 %arg, label %for.body3.lr.ph.us.i, label %for.body3.us.i meshBB1: ; preds = %for.inc8.us.i, %entry br label %for.body3.lr.ph.us.i meshBB5: ; preds = %entry - br i1 undef, label %eval_At_times_u.exit, label %for.inc8.us.i2 + br i1 %arg, label %eval_At_times_u.exit, label %for.inc8.us.i2 } diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll index 8898ea85b2223b4..6627e4a1a0c1f97 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll @@ -2,7 +2,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -define void @indvar_expansion(ptr nocapture readonly %rowsptr) { +define void @indvar_expansion(ptr nocapture readonly %rowsptr, i1 %arg) { entry: br label %for.cond @@ -22,7 +22,7 @@ for.cond: br i1 %cmp, label %for.cond, label %for.cond2 for.cond2: - br i1 undef, label %for.cond2, label %for.body14.lr.ph + br i1 %arg, label %for.cond2, label %for.body14.lr.ph for.body14.lr.ph: %sext = shl i64 %indvars.iv44, 32 diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll index a6bff63dfc7158a..a444e31abbcb9f6 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll @@ -50,7 +50,7 @@ bb10: ; preds = %bb10, %bb %tmp22 = shl i64 %tmp21, 1 %tmp23 = mul i64 %tmp22, %tmp22 %tmp24 = add nuw nsw i64 %tmp11, 1 - br i1 undef, label %bb1, label %bb10 + br i1 true, label %bb1, label %bb10 } diff --git a/llvm/test/Transforms/LoopStrengthReduce/callbr-critical-edge-splitting.ll b/llvm/test/Transforms/LoopStrengthReduce/callbr-critical-edge-splitting.ll index 58f9d7fd02eb583..e7afc96c72d5bb1 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/callbr-critical-edge-splitting.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/callbr-critical-edge-splitting.ll @@ -69,11 +69,11 @@ cond.true.i: ; preds = %for.cond do.body.i.i.do.body.i.i_crit_edge: ; preds = %do.body.i.i.do.body.i.i_crit_edge, %cond.true.i %pgocount711 = phi i64 [ %0, %do.body.i.i.do.body.i.i_crit_edge ], [ 0, %cond.true.i ] %0 = add nuw nsw i64 %pgocount711, 1 - br i1 undef, label %do.body.i.i.rdrand_int.exit.i_crit_edge, label %do.body.i.i.do.body.i.i_crit_edge + br i1 true, label %do.body.i.i.rdrand_int.exit.i_crit_edge, label %do.body.i.i.do.body.i.i_crit_edge do.body.i.i.rdrand_int.exit.i_crit_edge: ; preds = %do.body.i.i.do.body.i.i_crit_edge %1 = add i64 %0, undef - br i1 undef, label %for.end, label %for.inc + br i1 true, label %for.end, label %for.inc for.inc: ; preds = %do.body.i.i.rdrand_int.exit.i_crit_edge br label %for.cond diff --git a/llvm/test/Transforms/LoopStrengthReduce/dominate-assert.ll b/llvm/test/Transforms/LoopStrengthReduce/dominate-assert.ll index 4771dd5988a558c..9bb3ce88eb37ee6 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/dominate-assert.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/dominate-assert.ll @@ -4,13 +4,13 @@ declare ptr @_Znwm() declare i32 @__gxx_personality_v0(...) declare void @g() -define void @f() personality ptr @__gxx_personality_v0 { +define void @f(i1 %arg) personality ptr @__gxx_personality_v0 { bb0: br label %bb1 bb1: %v0 = phi i64 [ 0, %bb0 ], [ %v1, %bb1 ] %v1 = add nsw i64 %v0, 1 - br i1 undef, label %bb2, label %bb1 + br i1 %arg, label %bb2, label %bb1 bb2: %v2 = icmp eq i64 %v0, 0 br i1 %v2, label %bb6, label %bb3 @@ -69,34 +69,34 @@ bb7: } ; PR17425 -define void @i() { +define void @i(i1 %arg) { entry: br label %while.cond while.cond: ; preds = %while.cond, %entry %c.0 = phi ptr [ undef, %entry ], [ %incdec.ptr, %while.cond ] %incdec.ptr = getelementptr inbounds i16, ptr %c.0, i64 1 - br i1 undef, label %while.cond1, label %while.cond + br i1 %arg, label %while.cond1, label %while.cond while.cond1: ; preds = %while.cond1, %while.cond %c.1 = phi ptr [ %incdec.ptr5, %while.cond1 ], [ %c.0, %while.cond ] %incdec.ptr5 = getelementptr inbounds i16, ptr %c.1, i64 1 - br i1 undef, label %while.cond7, label %while.cond1 + br i1 %arg, label %while.cond7, label %while.cond1 while.cond7: ; preds = %while.cond7, %while.cond1 %0 = phi ptr [ %incdec.ptr10, %while.cond7 ], [ %c.1, %while.cond1 ] %incdec.ptr10 = getelementptr inbounds i16, ptr %0, i64 1 - br i1 undef, label %while.cond12.preheader, label %while.cond7 + br i1 %arg, label %while.cond12.preheader, label %while.cond7 while.cond12.preheader: ; preds = %while.cond7 - br i1 undef, label %while.end16, label %while.body13.lr.ph + br i1 %arg, label %while.end16, label %while.body13.lr.ph while.body13: ; preds = %if.else, %while.body13.lr.ph %1 = phi ptr [ %2, %while.body13.lr.ph ], [ %incdec.ptr15, %if.else ] - br i1 undef, label %while.cond12.outer.loopexit, label %if.else + br i1 %arg, label %while.cond12.outer.loopexit, label %if.else while.cond12.outer.loopexit: ; preds = %while.body13 - br i1 undef, label %while.end16, label %while.body13.lr.ph + br i1 %arg, label %while.end16, label %while.body13.lr.ph while.body13.lr.ph: ; preds = %while.cond12.outer.loopexit, %while.cond12.preheader %2 = phi ptr [ %1, %while.cond12.outer.loopexit ], [ undef, %while.cond12.preheader ] diff --git a/llvm/test/Transforms/LoopStrengthReduce/funclet.ll b/llvm/test/Transforms/LoopStrengthReduce/funclet.ll index 8ba81e75618c75a..da5721a72a90638 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/funclet.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/funclet.ll @@ -63,7 +63,7 @@ loop_body: ; preds = %iter, %pad iter: ; preds = %loop_body %tmp101 = getelementptr inbounds i8, ptr %tmp99, i32 1 - br i1 undef, label %unwind_out, label %loop_body + br i1 true, label %unwind_out, label %loop_body unwind_out: ; preds = %iter, %loop_body cleanupret from %cleanuppadi4.i.i.i unwind to caller @@ -130,7 +130,7 @@ loop_body: ; preds = %iter, %pad iter: ; preds = %loop_body %tmp101 = getelementptr inbounds i8, ptr %tmp99, i32 1 - br i1 undef, label %unwind_out, label %loop_body + br i1 true, label %unwind_out, label %loop_body } define void @h() personality ptr @_except_handler3 { @@ -194,7 +194,7 @@ loop_body: ; preds = %iter, %pad iter: ; preds = %loop_body %tmp101 = getelementptr inbounds i8, ptr %tmp99, i32 1 - br i1 undef, label %unwind_out, label %loop_body + br i1 true, label %unwind_out, label %loop_body } define void @i() personality ptr @_except_handler3 { @@ -255,7 +255,7 @@ loop_body: ; preds = %iter, %catchpad iter: ; preds = %loop_body %tmp101 = getelementptr inbounds i8, ptr %tmp99, i32 1 - br i1 undef, label %unwind_out, label %loop_body + br i1 true, label %unwind_out, label %loop_body unwind_out: ; preds = %iter, %loop_body unreachable diff --git a/llvm/test/Transforms/LoopStrengthReduce/hoist-parent-preheader.ll b/llvm/test/Transforms/LoopStrengthReduce/hoist-parent-preheader.ll index c80a6a5a84351cd..fa8df738bfe0d9a 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/hoist-parent-preheader.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/hoist-parent-preheader.ll @@ -1,9 +1,9 @@ ; RUN: opt < %s -loop-reduce -verify target triple = "x86_64-apple-darwin10" -define void @myquicksort(ptr %a) nounwind ssp { +define void @myquicksort(ptr %a, i1 %arg) nounwind ssp { entry: - br i1 undef, label %loop1, label %return + br i1 %arg, label %loop1, label %return loop1: ; preds = %bb13.loopexit, %entry %indvar419 = phi i64 [ %indvar.next420, %loop2.exit ], [ 0, %entry ] @@ -25,7 +25,7 @@ loop2.backedge: ; preds = %loop2 loop2.exit: ; preds = %loop2 %indvar.next420 = add i64 %indvar419, 1 - br i1 undef, label %loop1, label %return + br i1 %arg, label %loop1, label %return return: ; preds = %loop2.exit, %entry ret void diff --git a/llvm/test/Transforms/LoopStrengthReduce/ivchain.ll b/llvm/test/Transforms/LoopStrengthReduce/ivchain.ll index 0c1dce52d08766b..f1c99386b84f608 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/ivchain.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/ivchain.ll @@ -12,11 +12,11 @@ target datalayout = "n8:16:32:64" %struct = type { ptr, ptr, i16, i64, i16, i16, i16, i64, i64, i16, ptr, i64, i64, i64 } -define i32 @test(ptr %h, i32 %more) nounwind uwtable { +define i32 @test(ptr %h, i32 %more, i1 %arg) nounwind uwtable { ; CHECK-LABEL: define i32 @test -; CHECK-SAME: (ptr [[H:%.*]], i32 [[MORE:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (ptr [[H:%.*]], i32 [[MORE:%.*]], i1 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[LAND_END238:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: br i1 [[ARG]], label [[LAND_END238:%.*]], label [[RETURN:%.*]] ; CHECK: land.end238: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -38,7 +38,7 @@ define i32 @test(ptr %h, i32 %more) nounwind uwtable { ; CHECK-NEXT: ret i32 1 ; entry: - br i1 undef, label %land.end238, label %return + br i1 %arg, label %land.end238, label %return land.end238: ; preds = %if.end229 br label %for.body diff --git a/llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll b/llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll index 1c29331a9ac3812..6c0eb9bb4995d35 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll @@ -46,7 +46,7 @@ top: L86: ; preds = %L86, %top %i.0 = phi i64 [ 0, %top ], [ %tmp, %L86 ] %tmp = add i64 %i.0, 1 - br i1 undef, label %L86, label %if29 + br i1 false, label %L86, label %if29 if29: ; preds = %L86 %tmp1 = shl i64 %tmp, 1 @@ -60,13 +60,13 @@ if31: ; preds = %if38, %if29 L119: ; preds = %L119, %if31 %i5.0 = phi i64 [ %"#temp#1.sroa.0.022", %if31 ], [ %tmp3, %L119 ] %tmp3 = add i64 %i5.0, 1 - br i1 undef, label %L119, label %if38 + br i1 false, label %L119, label %if38 if38: ; preds = %L119 %tmp4 = add i64 %tmp2, %i5.0 %tmp5 = getelementptr i64, ptr addrspace(10) %arg, i64 %tmp4 %tmp6 = load i64, ptr addrspace(10) %tmp5 - br i1 undef, label %done, label %if31 + br i1 true, label %done, label %if31 done: ; preds = %if38 ret void diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr12048.ll b/llvm/test/Transforms/LoopStrengthReduce/pr12048.ll index 6017f8ca5927d7e..74ab16544a24b85 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/pr12048.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/pr12048.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -loop-reduce -define void @resolve_name() nounwind uwtable ssp { +define void @resolve_name(i1 %arg, i8 %arg2) nounwind uwtable ssp { br label %while.cond40.preheader while.cond132.while.cond.loopexit_crit_edge: br label %while.cond40.preheader @@ -9,7 +9,7 @@ while.cond40.preheader: while.cond40: %indvars.iv194 = phi ptr [ null, %while.cond40.preheader ], [ %scevgep, %while.body51 ] %tmp.1 = phi ptr [ undef, %while.cond40.preheader ], [ %incdec.ptr, %while.body51 ] - switch i8 undef, label %while.body51 [ + switch i8 %arg2, label %while.body51 [ i8 0, label %if.then59 ] while.body51: ; preds = %land.end50 @@ -17,7 +17,7 @@ while.body51: ; preds = %land.end50 %scevgep = getelementptr i8, ptr %indvars.iv194, i64 1 br label %while.cond40 if.then59: ; preds = %while.end - br i1 undef, label %if.then64, label %if.end113 + br i1 %arg, label %if.then64, label %if.end113 if.then64: ; preds = %if.then59 %incdec.ptr88.tmp.2 = select i1 undef, ptr undef, ptr undef br label %if.end113 @@ -33,5 +33,5 @@ while.body139.lr.ph: ; preds = %while.cond132.prehe br label %while.body139 while.body139: ; preds = %while.body139, %while.body139.lr.ph %start_of_var.0177 = phi ptr [ %tmp.1, %while.body139.lr.ph ], [ null, %while.body139 ] - br i1 undef, label %while.cond132.while.cond.loopexit_crit_edge, label %while.body139 + br i1 %arg, label %while.cond132.while.cond.loopexit_crit_edge, label %while.body139 } diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr50765.ll b/llvm/test/Transforms/LoopStrengthReduce/pr50765.ll index 5b4e5ed0679bb3a..1dae1902152f3bf 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/pr50765.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/pr50765.ll @@ -5,7 +5,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" target triple = "x86_64-unknown-linux-gnu" -define void @test() { +define void @test(i1 %arg) { ; CHECK-LABEL: test bb: %tmp = load i32, ptr addrspace(3) undef, align 4 @@ -17,7 +17,7 @@ bb1: ; preds = %bb38, %bb %tmp4 = add i32 %tmp3, 1 %tmp5 = call i32 @llvm.smax.i32(i32 %tmp4, i32 74) %tmp6 = add nuw nsw i64 %tmp2, 1 - br i1 undef, label %bb7, label %bb38 + br i1 %arg, label %bb7, label %bb38 bb7: ; preds = %bb1 %tmp8 = trunc i64 %tmp6 to i32 diff --git a/llvm/test/Transforms/LoopStrengthReduce/scaling-factor-incompat-type.ll b/llvm/test/Transforms/LoopStrengthReduce/scaling-factor-incompat-type.ll index 8cf4f8e9c129f4e..b76f29ad9e6516d 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/scaling-factor-incompat-type.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/scaling-factor-incompat-type.ll @@ -54,5 +54,5 @@ bb11: ; preds = %bb4 bb13: ; preds = %bb4 %t14 = add nuw nsw i64 %t5, 6 - br i1 undef, label %bb1, label %bb4 + br i1 true, label %bb1, label %bb4 } diff --git a/llvm/test/Transforms/LoopStrengthReduce/scaling_factor_cost_crash.ll b/llvm/test/Transforms/LoopStrengthReduce/scaling_factor_cost_crash.ll index 345606cfcd97523..f22a5ef6fcbc473 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/scaling_factor_cost_crash.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/scaling_factor_cost_crash.ll @@ -4,41 +4,41 @@ target triple = "i686-pc-win32" ; Assertion failed: (CurScaleCost >= 0 && "Legal addressing mode has an illegal cost!") ; CHECK-LABEL: @scalingFactorCrash( -define void @scalingFactorCrash() { - br i1 undef, label %1, label %24 +define void @scalingFactorCrash(i1 %arg) { + br i1 %arg, label %1, label %24 ;