llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-powerpc @llvm/pr-subscribers-backend-systemz Author: Sayan Sivakumaran (sivakusayan) <details> <summary>Changes</summary> Previously, arguments with `ABIArgInfo::Indirect` would not be given alignment information if it wasn't `byval`. This could prevent certain optimizations, such as those guarded by `isDereferenceableAndAlignedPointer` checks. The missing dereferenceability information is pointed out in https://github.com/llvm/llvm-project/issues/129337, for example. This PR will fix the first half of this problem, which is emitting alignment information. Dereferenceability information will be added in a subsequent PR. --- Patch is 2.18 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/201999.diff 125 Files Affected: - (modified) clang/lib/CodeGen/CGCall.cpp (+2-6) - (modified) clang/test/CodeGen/64bit-swiftcall.c (+1-1) - (modified) clang/test/CodeGen/AArch64/args.cpp (+1-1) - (modified) clang/test/CodeGen/AArch64/byval-temp.c (+8-8) - (modified) clang/test/CodeGen/AArch64/pure-scalable-args-empty-union.c (+1-1) - (modified) clang/test/CodeGen/AArch64/pure-scalable-args.c (+22-22) - (modified) clang/test/CodeGen/AArch64/struct-coerce-using-ptr.cpp (+10-10) - (modified) clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c (+3-3) - (modified) clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp (+1-1) - (modified) clang/test/CodeGen/LoongArch/bitint.c (+3-3) - (modified) clang/test/CodeGen/LoongArch/lasx/builtin-alias.c (+733-733) - (modified) clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c (+4-4) - (modified) clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c (+4-4) - (modified) clang/test/CodeGen/LoongArch/lasx/builtin.c (+733-733) - (modified) clang/test/CodeGen/PowerPC/ppc64-vector.c (+1-1) - (modified) clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c (+2-2) - (modified) clang/test/CodeGen/RISCV/bitint.c (+24-24) - (modified) clang/test/CodeGen/RISCV/riscv-abi.cpp (+4-4) - (modified) clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c (+5-5) - (modified) clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp (+5-5) - (modified) clang/test/CodeGen/RISCV/riscv32-abi.c (+37-37) - (modified) clang/test/CodeGen/RISCV/riscv32-vararg.c (+1-1) - (modified) clang/test/CodeGen/RISCV/riscv64-abi.c (+9-9) - (modified) clang/test/CodeGen/RISCV/riscv64-vararg.c (+1-1) - (modified) clang/test/CodeGen/Sparc/sparcv9-abi.c (+3-3) - (modified) clang/test/CodeGen/SystemZ/sync-builtins-i128-16Al.c (+1-1) - (modified) clang/test/CodeGen/SystemZ/systemz-abi-vector.c (+46-46) - (modified) clang/test/CodeGen/SystemZ/systemz-abi.c (+19-19) - (modified) clang/test/CodeGen/SystemZ/systemz-inline-asm.c (+1-1) - (modified) clang/test/CodeGen/SystemZ/zos-abi.c (+1-1) - (modified) clang/test/CodeGen/X86/cx-complex-range.c (+7-7) - (modified) clang/test/CodeGen/X86/x86_32-arguments-win32.c (+7-7) - (modified) clang/test/CodeGen/X86/x86_64-arguments-win32.c (+1-1) - (modified) clang/test/CodeGen/aapcs64-align.cpp (+2-2) - (modified) clang/test/CodeGen/arm-aapcs-vfp.c (+1-1) - (modified) clang/test/CodeGen/arm-abi-vector.c (+3-3) - (modified) clang/test/CodeGen/arm-swiftcall.c (+1-1) - (modified) clang/test/CodeGen/arm64-abi-vector.c (+7-7) - (modified) clang/test/CodeGen/arm64-arguments.c (+13-13) - (modified) clang/test/CodeGen/arm64-microsoft-arguments.cpp (+1-1) - (modified) clang/test/CodeGen/arm64ec-varargs.c (+1-1) - (modified) clang/test/CodeGen/armv7k-abi.c (+1-1) - (modified) clang/test/CodeGen/atomic-arm64.c (+1-1) - (modified) clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c (+2-2) - (modified) clang/test/CodeGen/attr-noundef.cpp (+3-3) - (modified) clang/test/CodeGen/cx-complex-range.c (+44-44) - (modified) clang/test/CodeGen/ext-int-cc.c (+18-18) - (modified) clang/test/CodeGen/isfpclass.c (+1-1) - (modified) clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c (+6-6) - (modified) clang/test/CodeGen/mingw-long-double.c (+3-3) - (modified) clang/test/CodeGen/ms_abi.c (+2-2) - (modified) clang/test/CodeGen/pass-by-value-noalias.c (+2-2) - (modified) clang/test/CodeGen/ptrauth-in-c-struct.c (+2-2) - (modified) clang/test/CodeGen/regcall.c (+12-11) - (modified) clang/test/CodeGen/regcall4.c (+12-11) - (modified) clang/test/CodeGen/vectorcall.c (+23-23) - (modified) clang/test/CodeGen/win-fp128.c (+1-1) - (modified) clang/test/CodeGen/win64-i128.c (+2-2) - (modified) clang/test/CodeGen/windows-seh-arg-capture-crash.cpp (+1-1) - (modified) clang/test/CodeGen/windows-swiftcall.c (+1-1) - (modified) clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp (+2-2) - (modified) clang/test/CodeGenCXX/aix-alignment.cpp (+1-1) - (modified) clang/test/CodeGenCXX/amdgcn-func-arg.cpp (+3-3) - (modified) clang/test/CodeGenCXX/arm-cc.cpp (+1-1) - (modified) clang/test/CodeGenCXX/arm-swiftcall.cpp (+1-1) - (modified) clang/test/CodeGenCXX/attr-target-mv-inalloca.cpp (+4-4) - (modified) clang/test/CodeGenCXX/blocks.cpp (+1-1) - (modified) clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp (+10-10) - (modified) clang/test/CodeGenCXX/copy-initialization.cpp (+1-1) - (modified) clang/test/CodeGenCXX/cxx1z-copy-omission.cpp (+1-1) - (modified) clang/test/CodeGenCXX/derived-to-base-conv.cpp (+2-2) - (modified) clang/test/CodeGenCXX/empty-nontrivially-copyable.cpp (+2-2) - (modified) clang/test/CodeGenCXX/fastcall.cpp (+1-1) - (modified) clang/test/CodeGenCXX/homogeneous-aggregates.cpp (+7-7) - (modified) clang/test/CodeGenCXX/inalloca-lambda.cpp (+3-3) - (modified) clang/test/CodeGenCXX/inalloca-overaligned.cpp (+4-4) - (modified) clang/test/CodeGenCXX/inalloca-vector.cpp (+2-2) - (modified) clang/test/CodeGenCXX/inheriting-constructor.cpp (+4-4) - (modified) clang/test/CodeGenCXX/member-function-pointer-calls.cpp (+2-2) - (modified) clang/test/CodeGenCXX/microsoft-abi-arg-order.cpp (+2-2) - (modified) clang/test/CodeGenCXX/microsoft-abi-byval-thunks.cpp (+6-6) - (modified) clang/test/CodeGenCXX/microsoft-abi-member-pointers.cpp (+3-3) - (modified) clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp (+13-13) - (modified) clang/test/CodeGenCXX/microsoft-abi-unknown-arch.cpp (+1-1) - (modified) clang/test/CodeGenCXX/ms-property.cpp (+2-2) - (modified) clang/test/CodeGenCXX/nrvo.cpp (+5-5) - (modified) clang/test/CodeGenCXX/pass-by-value-noalias.cpp (+6-6) - (modified) clang/test/CodeGenCXX/powerpc-byval.cpp (+1-1) - (modified) clang/test/CodeGenCXX/ptrauth-qualifier-struct.cpp (+1-1) - (modified) clang/test/CodeGenCXX/regcall.cpp (+3-3) - (modified) clang/test/CodeGenCXX/regcall4.cpp (+3-3) - (modified) clang/test/CodeGenCXX/regparm.cpp (+1-1) - (modified) clang/test/CodeGenCXX/temporaries.cpp (+1-1) - (modified) clang/test/CodeGenCXX/trivial_abi.cpp (+4-4) - (modified) clang/test/CodeGenCXX/uncopyable-args.cpp (+16-16) - (modified) clang/test/CodeGenCXX/wasm-args-returns.cpp (+6-6) - (modified) clang/test/CodeGenCXX/windows-x86-swiftcall.cpp (+2-2) - (modified) clang/test/CodeGenCXX/x86_32-arguments.cpp (+2-2) - (modified) clang/test/CodeGenCoroutines/coro-params.cpp (+2-2) - (modified) clang/test/CodeGenHLSL/builtins/ConstantBuffer.hlsl (+2-2) - (modified) clang/test/CodeGenHLSL/builtins/hlsl_resource_t.hlsl (+6-6) - (modified) clang/test/CodeGenHLSL/implicit-norecurse-attrib.hlsl (+1-1) - (modified) clang/test/CodeGenObjC/pass-by-value-noalias.m (+2-2) - (modified) clang/test/CodeGenObjC/weak-in-c-struct.m (+3-3) - (modified) clang/test/CodeGenObjCXX/objc-struct-cxx-abi.mm (+9-9) - (modified) clang/test/CodeGenObjCXX/property-dot-copy-elision.mm (+3-3) - (modified) clang/test/CodeGenObjCXX/property-objects.mm (+2-2) - (modified) clang/test/CodeGenObjCXX/ptrauth-struct-cxx-abi.mm (+2-2) - (modified) clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp (+3-3) - (modified) clang/test/DebugInfo/CXX/debug-info.cpp (+1-1) - (modified) clang/test/DebugInfo/ObjC/nontrivial-c-struct-exception.m (+1-1) - (modified) clang/test/Headers/stdarg.cpp (+2-2) - (modified) clang/test/OpenMP/for_firstprivate_codegen.cpp (+25-25) - (modified) clang/test/OpenMP/parallel_firstprivate_codegen.cpp (+128-128) - (modified) clang/test/OpenMP/sections_firstprivate_codegen.cpp (+17-17) - (modified) clang/test/OpenMP/single_firstprivate_codegen.cpp (+17-17) - (modified) clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp (+20-20) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp (+56-56) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp (+56-56) - (modified) clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp (+20-20) - (modified) clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp (+20-20) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp (+28-28) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp (+28-28) - (modified) clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp (+20-20) - (modified) clang/test/OpenMP/teams_firstprivate_codegen.cpp (+20-20) ``````````diff diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 40cc275d40273..f4af28c447ce9 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -3061,13 +3061,9 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // // This is different from indirect *not* byval, where the object // exists already, and the align attribute is purely - // informative. + // informative (but is still useful for optimization passes). assert(!Align.isZero()); - - // For now, only add this when we have a byval argument. - // TODO: be less lazy about updating test cases. - if (AI.getIndirectByVal()) - Attrs.addAlignmentAttr(Align.getQuantity()); + Attrs.addAlignmentAttr(Align.getQuantity()); // byval disables readnone and readonly. AddPotentialArgAccess(); diff --git a/clang/test/CodeGen/64bit-swiftcall.c b/clang/test/CodeGen/64bit-swiftcall.c index 448bca7acbca3..66614f180e0ea 100644 --- a/clang/test/CodeGen/64bit-swiftcall.c +++ b/clang/test/CodeGen/64bit-swiftcall.c @@ -239,7 +239,7 @@ TEST(struct_big_1) // CHECK-LABEL: define {{.*}} void @return_struct_big_1(ptr dead_on_unwind noalias writable sret // Should not be byval. -// CHECK-LABEL: define {{.*}} void @take_struct_big_1(ptr dead_on_return{{( %.*)?}}) +// CHECK-LABEL: define {{.*}} void @take_struct_big_1(ptr align 8 dead_on_return{{( %.*)?}}) /*****************************************************************************/ /********************************* TYPE MERGING ******************************/ diff --git a/clang/test/CodeGen/AArch64/args.cpp b/clang/test/CodeGen/AArch64/args.cpp index e214d170f0423..3b5ea2d7f5526 100644 --- a/clang/test/CodeGen/AArch64/args.cpp +++ b/clang/test/CodeGen/AArch64/args.cpp @@ -34,7 +34,7 @@ EXTERNC int empty_align16_arg(struct EmptyAlign16 a, int b) { return b; } -// CXX: define{{.*}} i32 @empty_align32_arg(ptr noundef dead_on_return %a, i32 noundef %b) +// CXX: define{{.*}} i32 @empty_align32_arg(ptr noundef align 32 dead_on_return %a, i32 noundef %b) struct EmptyAlign32 { long long int __attribute__((aligned(32))) : 0; }; EXTERNC int empty_align32_arg(struct EmptyAlign32 a, int b) { return b; diff --git a/clang/test/CodeGen/AArch64/byval-temp.c b/clang/test/CodeGen/AArch64/byval-temp.c index fba81a23e32e6..823085ee5c9ef 100644 --- a/clang/test/CodeGen/AArch64/byval-temp.c +++ b/clang/test/CodeGen/AArch64/byval-temp.c @@ -30,10 +30,10 @@ void example(void) { // Then, memcpy `l` to the temporary stack space. // CHECK-O0-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %[[byvaltemp]], ptr align 8 %[[l]], i64 64, i1 false) // Finally, call using a pointer to the temporary stack space. -// CHECK-O0-NEXT: call void @pass_large(ptr noundef dead_on_return %[[byvaltemp]]) +// CHECK-O0-NEXT: call void @pass_large(ptr noundef align 8 dead_on_return %[[byvaltemp]]) // Now, do the same for the second call, using the second temporary alloca. // CHECK-O0-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %[[byvaltemp1]], ptr align 8 %[[l]], i64 64, i1 false) -// CHECK-O0-NEXT: call void @pass_large(ptr noundef dead_on_return %[[byvaltemp1]]) +// CHECK-O0-NEXT: call void @pass_large(ptr noundef align 8 dead_on_return %[[byvaltemp1]]) // CHECK-O0-NEXT: ret void // // At O3, we should have lifetime markers to help the optimizer re-use the temporary allocas. @@ -58,7 +58,7 @@ void example(void) { // Then, memcpy `l` to the temporary stack space. // CHECK-O3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %[[byvaltemp]], ptr align 8 %[[l]], i64 64, i1 false) // Finally, call using a pointer to the temporary stack space. -// CHECK-O3-NEXT: call void @pass_large(ptr noundef dead_on_return %[[byvaltemp]]) +// CHECK-O3-NEXT: call void @pass_large(ptr noundef align 8 dead_on_return %[[byvaltemp]]) // // The lifetime of the temporary used to pass a pointer to the struct ends here. // CHECK-O3-NEXT: call void @llvm.lifetime.end.p0(ptr %[[byvaltemp]]) @@ -66,7 +66,7 @@ void example(void) { // Now, do the same for the second call, using the second temporary alloca. // CHECK-O3-NEXT: call void @llvm.lifetime.start.p0(ptr %[[byvaltemp1]]) // CHECK-O3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %[[byvaltemp1]], ptr align 8 %[[l]], i64 64, i1 false) -// CHECK-O3-NEXT: call void @pass_large(ptr noundef dead_on_return %[[byvaltemp1]]) +// CHECK-O3-NEXT: call void @pass_large(ptr noundef align 8 dead_on_return %[[byvaltemp1]]) // CHECK-O3-NEXT: call void @llvm.lifetime.end.p0(ptr %[[byvaltemp1]]) // // Mark the end of the lifetime of `l`. @@ -88,12 +88,12 @@ void example_BitInt(void) { // CHECK-O0-NEXT: [[LOADEDV:%.*]] = trunc i256 [[TMP0]] to i129 // CHECK-O0-NEXT: [[STOREDV:%.*]] = sext i129 [[LOADEDV]] to i256 // CHECK-O0-NEXT: store i256 [[STOREDV]], ptr [[INDIRECT_ARG_TEMP]], align 16 -// CHECK-O0-NEXT: call void @pass_large_BitInt(ptr noundef dead_on_return [[INDIRECT_ARG_TEMP]]) +// CHECK-O0-NEXT: call void @pass_large_BitInt(ptr noundef align 16 dead_on_return [[INDIRECT_ARG_TEMP]]) // CHECK-O0-NEXT: [[TMP1:%.*]] = load i256, ptr [[L]], align 16 // CHECK-O0-NEXT: [[LOADEDV1:%.*]] = trunc i256 [[TMP1]] to i129 // CHECK-O0-NEXT: [[STOREDV1:%.*]] = sext i129 [[LOADEDV1]] to i256 // CHECK-O0-NEXT: store i256 [[STOREDV1]], ptr [[INDIRECT_ARG_TEMP1]], align 16 -// CHECK-O0-NEXT: call void @pass_large_BitInt(ptr noundef dead_on_return [[INDIRECT_ARG_TEMP1]]) +// CHECK-O0-NEXT: call void @pass_large_BitInt(ptr noundef align 16 dead_on_return [[INDIRECT_ARG_TEMP1]]) // CHECK-O0-NEXT: ret void // // CHECK-O3-LABEL: define dso_local void @example_BitInt( @@ -108,13 +108,13 @@ void example_BitInt(void) { // CHECK-O3-NEXT: call void @llvm.lifetime.start.p0(ptr [[INDIRECT_ARG_TEMP]]) // CHECK-O3-NEXT: [[STOREDV:%.*]] = sext i129 [[LOADEDV]] to i256 // CHECK-O3-NEXT: store i256 [[STOREDV]], ptr [[INDIRECT_ARG_TEMP]], align 16, !tbaa [[TBAA6]] -// CHECK-O3-NEXT: call void @pass_large_BitInt(ptr noundef dead_on_return [[INDIRECT_ARG_TEMP]]) +// CHECK-O3-NEXT: call void @pass_large_BitInt(ptr noundef align 16 dead_on_return [[INDIRECT_ARG_TEMP]]) // CHECK-O3-NEXT: call void @llvm.lifetime.end.p0(ptr [[INDIRECT_ARG_TEMP]]) // CHECK-O3-NEXT: [[TMP1:%.*]] = load i256, ptr [[L]], align 16, !tbaa [[TBAA6]] // CHECK-O3-NEXT: [[LOADEDV1:%.*]] = trunc i256 [[TMP1]] to i129 // CHECK-O3-NEXT: call void @llvm.lifetime.start.p0(ptr [[INDIRECT_ARG_TEMP1]]) // CHECK-O3-NEXT: [[STOREDV1:%.*]] = sext i129 [[LOADEDV1]] to i256 // CHECK-O3-NEXT: store i256 [[STOREDV1]], ptr [[INDIRECT_ARG_TEMP1]], align 16, !tbaa [[TBAA6]] -// CHECK-O3-NEXT: call void @pass_large_BitInt(ptr noundef dead_on_return [[INDIRECT_ARG_TEMP1]]) +// CHECK-O3-NEXT: call void @pass_large_BitInt(ptr noundef align 16 dead_on_return [[INDIRECT_ARG_TEMP1]]) // CHECK-O3-NEXT: call void @llvm.lifetime.end.p0(ptr [[INDIRECT_ARG_TEMP1]]) // CHECK-O3-NEXT: call void @llvm.lifetime.end.p0(ptr [[L]]) diff --git a/clang/test/CodeGen/AArch64/pure-scalable-args-empty-union.c b/clang/test/CodeGen/AArch64/pure-scalable-args-empty-union.c index 382a331f04f14..6868b0816450a 100644 --- a/clang/test/CodeGen/AArch64/pure-scalable-args-empty-union.c +++ b/clang/test/CodeGen/AArch64/pure-scalable-args-empty-union.c @@ -19,7 +19,7 @@ void f0(S0 *p) { use0(*p); } // CHECK-C: declare void @use0(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) -// CHECK-CXX: declare void @use0(ptr noundef dead_on_return) +// CHECK-CXX: declare void @use0(ptr noundef align 16 dead_on_return) #ifdef __cplusplus diff --git a/clang/test/CodeGen/AArch64/pure-scalable-args.c b/clang/test/CodeGen/AArch64/pure-scalable-args.c index 3380e3206e450..b2367b761ec7d 100644 --- a/clang/test/CodeGen/AArch64/pure-scalable-args.c +++ b/clang/test/CodeGen/AArch64/pure-scalable-args.c @@ -92,7 +92,7 @@ void test_argpass_simple(PST *p) { // CHECK-AAPCS-NEXT: ret void // CHECK-AAPCS: declare void @argpass_simple_callee(<vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>) -// CHECK-DARWIN: declare void @argpass_simple_callee(ptr noundef dead_on_return) +// CHECK-DARWIN: declare void @argpass_simple_callee(ptr noundef align 16 dead_on_return) // Boundary case of using the last available Z-reg, PST expanded. // 0.0 -> d0-d3 @@ -107,7 +107,7 @@ void test_argpass_last_z(PST *p) { argpass_last_z_callee(.0, .0, .0, .0, *p); } // CHECK-AAPCS: declare void @argpass_last_z_callee(double noundef, double noundef, double noundef, double noundef, <vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>) -// CHECK-DARWIN: declare void @argpass_last_z_callee(double noundef, double noundef, double noundef, double noundef, ptr noundef dead_on_return) +// CHECK-DARWIN: declare void @argpass_last_z_callee(double noundef, double noundef, double noundef, double noundef, ptr noundef align 16 dead_on_return) // Like the above, but using a tuple type to occupy some registers. @@ -123,7 +123,7 @@ void test_argpass_last_z_tuple(PST *p, svfloat64x4_t x) { argpass_last_z_tuple_callee(x, *p); } // CHECK-AAPCS: declare void @argpass_last_z_tuple_callee(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>) -// CHECK-DARWIN: declare void @argpass_last_z_tuple_callee(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, ptr noundef dead_on_return) +// CHECK-DARWIN: declare void @argpass_last_z_tuple_callee(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, ptr noundef align 16 dead_on_return) // Boundary case of using the last available P-reg, PST expanded. @@ -139,7 +139,7 @@ void test_argpass_last_p(PST *p) { argpass_last_p_callee(svpfalse(), svpfalse_c(), *p); } // CHECK-AAPCS: declare void @argpass_last_p_callee(<vscale x 16 x i1>, target("aarch64.svcount"), <vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>) -// CHECK-DARWIN: declare void @argpass_last_p_callee(<vscale x 16 x i1>, target("aarch64.svcount"), ptr noundef dead_on_return) +// CHECK-DARWIN: declare void @argpass_last_p_callee(<vscale x 16 x i1>, target("aarch64.svcount"), ptr noundef align 16 dead_on_return) // Not enough Z-regs, push PST to memory and pass a pointer, Z-regs and @@ -157,7 +157,7 @@ void test_argpass_no_z(PST *p, double dummy, svmfloat8_t u, int8x16_t v, mfloat8 void argpass_no_z_callee(svmfloat8_t, int8x16_t, mfloat8x16_t, double, double, int, PST, int, double, svbool_t); argpass_no_z_callee(u, v, w, .0, .0, 1, *p, 2, 3.0, svptrue_b64()); } -// CHECK: declare void @argpass_no_z_callee(<vscale x 16 x i8>, <16 x i8> noundef, <16 x i8>, double noundef, double noundef, i32 noundef, ptr noundef dead_on_return, i32 noundef, double noundef, <vscale x 16 x i1>) +// CHECK: declare void @argpass_no_z_callee(<vscale x 16 x i8>, <16 x i8> noundef, <16 x i8>, double noundef, double noundef, i32 noundef, ptr noundef align 16 dead_on_return, i32 noundef, double noundef, <vscale x 16 x i1>) // Like the above, using a tuple to occupy some registers. @@ -173,7 +173,7 @@ void test_argpass_no_z_tuple_f64(PST *p, float dummy, svfloat64x4_t x) { double, svbool_t); argpass_no_z_tuple_f64_callee(x, .0, 1, *p, 2, 3.0, svptrue_b64()); } -// CHECK: declare void @argpass_no_z_tuple_f64_callee(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, double noundef, i32 noundef, ptr noundef dead_on_return, i32 noundef, double noundef, <vscale x 16 x i1>) +// CHECK: declare void @argpass_no_z_tuple_f64_callee(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, double noundef, i32 noundef, ptr noundef align 16 dead_on_return, i32 noundef, double noundef, <vscale x 16 x i1>) // Likewise, using a different tuple. @@ -189,7 +189,7 @@ void test_argpass_no_z_tuple_mfp8(PST *p, float dummy, svmfloat8x4_t x) { double, svbool_t); argpass_no_z_tuple_mfp8_callee(x, .0, 1, *p, 2, 3.0, svptrue_b64()); } -// CHECK: declare void @argpass_no_z_tuple_mfp8_callee(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, double noundef, i32 noundef, ptr noundef dead_on_return, i32 noundef, double noundef, <vscale x 16 x i1>) +// CHECK: declare void @argpass_no_z_tuple_mfp8_callee(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, double noundef, i32 noundef, ptr noundef align 16 dead_on_return, i32 noundef, double noundef, <vscale x 16 x i1>) // Not enough Z-regs (consumed by a HFA), PST passed indirectly @@ -204,8 +204,8 @@ void test_argpass_no_z_hfa(HFA *h, PST *p) { void argpass_no_z_hfa_callee(double, HFA, int, PST, int, svbool_t); argpass_no_z_hfa_callee(.0, *h, 1, *p, 2, svptrue_b64()); } -// CHECK-AAPCS: declare void @argpass_no_z_hfa_callee(double noundef, [4 x float] alignstack(8), i32 noundef, ptr noundef dead_on_return, i32 noundef, <vscale x 16 x i1>) -// CHECK-DARWIN: declare void @argpass_no_z_hfa_callee(double noundef, [4 x float], i32 noundef, ptr noundef dead_on_return, i32 noundef, <vscale x 16 x i1>) +// CHECK-AAPCS: declare void @argpass_no_z_hfa_callee(double noundef, [4 x float] alignstack(8), i32 noundef, ptr noundef align 16 dead_on_return, i32 noundef, <vscale x 16 x i1>) +// CHECK-DARWIN: declare void @argpass_no_z_hfa_callee(double noundef, [4 x float], i32 noundef, ptr noundef align 16 dead_on_return, i32 noundef, <vscale x 16 x i1>) // Not enough Z-regs (consumed by a HVA), PST passed indirectly // 0.0 -> d0 @@ -219,8 +219,8 @@ void test_argpass_no_z_hva(HVA *h, PST *p) { void argpass_no_z_hva_callee(double, HVA, int, PST, int, svbool_t); argpass_no_z_hva_callee(.0, *h, 1, *p, 2, svptrue_b64()); } -// CHECK-AAPCS: declare void @argpass_no_z_hva_callee(double noundef, [4 x <16 x i8>] alignstack(16), i32 noundef, ptr noundef dead_on_return, i32 noundef, <vscale x 16 x i1>) -// CHECK-DARWIN: declare void @argpass_no_z_hva_callee(double noundef, [4 x <16 x i8>], i32 noundef, ptr noundef dead_on_return, i32 noundef, <vscale x 16 x i1>) +// CHECK-AAPCS: declare void @argpass_no_z_hva_callee(double noundef, [4 x <16 x i8>] alignstack(16), i32 noundef, ptr noundef align 16 dead_on_return, i32 noundef, <vscale x 16 x i1>) +// CHECK-DARWIN: declare void @argpass_no_z_hva_callee(double noundef, [4 x <16 x i8>], i32 noundef, ptr noundef align 16 dead_on_return, i32 noundef, <vscale x 16 x i1>) // Not enough P-regs, PST passed indirectly, Z-regs and P-regs still available. // true -> p0-p2 @@ -233,7 +233,7 @@ void test_argpass_no_p(PST *p) { void argpass_no_p_callee(svbool_t, svbool_t, svbool_t, int, PST, int, double, svbool_t); argpass_no_p_callee(svptrue_b8(), svptrue_b16(), svptrue_b32(), 1, *p, 2, 3.0, svptrue_b64()); } -// CHECK: declare void @argpass_no_p_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, i32 noundef, ptr noundef dead_on_return, i32 noundef, double noundef, <vscale x 16 x i1>) +// CHECK: declare void @argpass_no_p_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, i32 noundef, ptr noundef align 16 dead_on_return, i32 noundef, double noundef, <vscale x 16 x i1>) // Like above, using a tuple to occupy some registers. @@ -250,7 +250,7 @@ void test_argpass_no_p_tuple(PST *p, svbool_t u, svboolx2_t v) { svbool_t); argpass_no_p_tuple_callee(v, u, 1, *p, 2, 3.0, svptrue_b64()); } -// CHECK: declare void @argpass_no_p_tuple_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, i32 noundef, ptr noundef dead_on_return, i32 noundef, double noundef, <vscale x 16 x i1>) +// CHECK: declare void @argpass_no_p_tuple_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, i32 noundef, ptr noundef align 16 dead_on_return, i32 noundef, double noundef, <vscale x 16 x i1>) // HFAs go back-to-back to memory, afterwards Z-regs not available, PST passed indirectly. @@ -263,8 +263,8 @@ void test_after_hfa(HFA *h, PST *p) { void after_hfa_callee(double, double, double, double, double, HFA, PST, HFA, svbool_t); after_hfa_callee(.0, .0, .0, .0, .0, *h, *p, *h, svpfalse()); } -// CHECK-AAPCS: declare void @after_hfa_callee(double noundef, double noundef, double noundef, double noundef, double noundef, [4 x float] alignstack(8), ptr noundef dead_on_return, [4 x float] alignstack(8), <vscale x 16 x i1>) -// CHECK-DARWIN: declare void @after_hfa_callee(double noundef, double noundef, double noundef, double noundef, double noundef, [4 x float], ptr noundef dead_on_return, [4 x float], <vscale x 16 x i1>) +// CHECK-AAPCS: declare void @after_hfa_callee(double noundef, double noundef, double noundef, double noundef, double noundef, [4 x float] alignstack(8), ptr noundef align 16 dead_on_return, [4 x float] alignstack(8), <vscale x 16 x i1>) +// CHECK-DARWIN: declare void @after_hfa_callee(double noundef, double noundef, double noundef, double noundef, double noundef, [4 x float], ptr noundef align 16 dead_on_return, [4 x float], <vscale x 16 x i1>) // Small PST, not enough registers, passed indirectly, unlike other small // aggregates. @@ -277,7 +277,7 @@ void test_small_pst(SmallPST *p, SmallAgg *s) { void small_pst_callee(SmallAgg, double, double, double, double, double, double, double, double, double, SmallPST, double); small_pst_callee(*s, .0, .0, .0, .0, .0, .0, .0, .0, 1.0, *p, 2.0); } -// CHECK-AAPCS: declare void @small_pst_callee([2 x i64], double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, ptr noundef dead_on_return, double noundef) +// CHECK-AAPCS: declare void @small_pst_callee([2 x i64], double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, ptr noundef align 16 dead_on_return, double noundef) // CHECK-DARWIN: declare void @small_pst_callee([2 x i64], double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, i128, double noundef) @@ -326,12 +326,12 @@ void test_pass_variadic(PST *p, PST *q) { pass_variadic_callee(*p, *q); } // CHECK-AAPCS: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp, ptr noundef nonnull align 16 dereferenceable(96) %q, i64 96, i1 false) -// CHECK-AAPCS: call void (<vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>, ...) @pass_variadic_callee(<vscale x 16 x i1> %1, <vscale x 2 x double> %cast.scalable1, <vscale x 4 x float> %cast.scalable2, <vscale x 4 x float> %cast.scalable3, <vscale x 16 x i8> %cast.scalable4, <vscale x 16 x i1> %12, ptr noundef nonnull dead_on_return %byval-temp) +// CHECK-AAPCS: call void (<vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>, ...) @pass_variadic_callee(<vscale x 16 x i1> %1, <vscale x 2 x double> %cast.scalable1, <vscale x 4 x float> %cast.scalable2, <vscale x 4 x float> %cast.scalable3, <vscale x 16 x i8> %cast.scalable4, <vscale x 16 x i1> %12, ptr noundef nonnull align 16 dead_on_return %byval-temp) // CHECK-DARWIN: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp, ptr noundef nonnull align 16 dereferenceable(96) %p, i64 96, i1 false) // CHECK-DARWIN: call void @llvm.lifetime.start.p0(ptr nonnull %byval-temp1) // CHECK-DARWIN: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp1, ptr noundef nonnull align 16 dereferenceable(96) %q, i64 96, i1 false) -// CHECK-DARWIN: call void (ptr, ...) @pass_variadic_callee(ptr noundef nonnull dead_on_return %byval-temp, ptr noundef nonnull dead_... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/201999 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
