pratlucas created this revision. Herald added subscribers: llvm-commits, cfe-commits, danielkiss, hiraditya, kristof.beyls. Herald added projects: clang, LLVM. pratlucas added a parent revision: D75903: [AArch64][CodeGen] Fixing stack alignment of HFA arguments on AArch64 PCS. pratlucas added reviewers: t.p.northover, olista01, rnk, asl.
Properly complying with AArch32 PCS on the handling of over-aligned HFA arguments when those are placed on the stack. AAPCS specifies that the stacked argument address should be adjusted upwards until correctly aligned for the argument before copying it to memory. This patch fixes the alignment of these arguments by makign use of the stack alignment propagated through the `alignstack` IR argument attribute during the calling convention lowering for ARM targets. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D75904 Files: clang/lib/CodeGen/TargetInfo.cpp clang/test/CodeGen/arm-aapcs-vfp.c llvm/lib/Target/ARM/ARMCallingConv.cpp llvm/test/CodeGen/ARM/aapcs-hfa-code.ll
Index: llvm/test/CodeGen/ARM/aapcs-hfa-code.ll =================================================================== --- llvm/test/CodeGen/ARM/aapcs-hfa-code.ll +++ llvm/test/CodeGen/ARM/aapcs-hfa-code.ll @@ -3,6 +3,8 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" +%struct.hfa_align = type { [2 x float] } + define arm_aapcs_vfpcc void @test_1float({ float } %a) { call arm_aapcs_vfpcc void @test_1float({ float } { float 1.0 }) ret void @@ -104,3 +106,73 @@ ret void } + +; Over-aligned HFA argument placed on register - one element per register +define arm_aapcs_vfpcc float @test_hfa_align_reg(%struct.hfa_align alignstack(8) %h1.coerce) local_unnamed_addr #3 { +entry: +; CHECK-LABEL: test_hfa_align_reg: +; CHECK-DAG: bx lr + +; CHECK-M4F-LABEL: test_hfa_align_reg: +; CHECK-M4F-DAG: bx lr + + %h1.coerce.fca.0.0.extract = extractvalue %struct.hfa_align %h1.coerce, 0, 0 + ret float %h1.coerce.fca.0.0.extract +} + +; Call with over-align HFA argument placed on registers - one element per register +define arm_aapcs_vfpcc float @test_hfa_align_reg_call() local_unnamed_addr #3 { +entry: +; CHECK-LABEL: test_hfa_align_reg_call: +; CHECK-DAG: vmov.f32 s0, #1.000000e+00 +; CHECK-DAG: vmov.f32 s1, #2.000000e+00 +; CHECK-DAG: bl test_hfa_align_reg + +; CHECK-M4F-LABEL: test_hfa_align_reg_call: +; CHECK-M4F-DAG: vmov.f32 s0, #1.000000e+00 +; CHECK-M4F-DAG: vmov.f32 s1, #2.000000e+00 +; CHECK-M4F-DAG: bl test_hfa_align_reg + + %call = call arm_aapcs_vfpcc float @test_hfa_align_reg(%struct.hfa_align alignstack(8) { [2 x float] [float 1.000000e+00, float 2.000000e+00] }) #5 + ret float %call +} + +; Over-aligned HFA argument placed on the stack - stack round up to alignment +define arm_aapcs_vfpcc float @test_hfa_align_stack(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %f1, %struct.hfa_align alignstack(8) %h1.coerce) local_unnamed_addr #3 { +entry: +; CHECK-LABEL: test_hfa_align_stack: +; CHECK-DAG: vldr s0, [sp, #8] +; CHECK-DAG: bx lr + +; CHECK-M4F-LABEL: test_hfa_align_stack: +; CHECK-M4F-DAG: vldr s0, [sp, #8] +; CHECK-M4F-DAG: bx lr + + %h1.coerce.fca.0.0.extract = extractvalue %struct.hfa_align %h1.coerce, 0, 0 + ret float %h1.coerce.fca.0.0.extract +} + +; Call with over-aligned HFA argument placed on the stack - stack round up to alignment +define arm_aapcs_vfpcc float @test_hfa_align_stack_call() local_unnamed_addr #3 { +entry: +; CHECK-LABEL: test_hfa_align_stack_call: +; CHECK-DAG: sub sp, sp, #16 +; CHECK-DAG: mov r0, #1073741824 +; CHECK-DAG: mov r1, #1065353216 +; CHECK-DAG: str r1, [sp, #8] +; CHECK-DAG: str r0, [sp, #12] +; CHECK-DAG: bl test_hfa_align_stack +; CHECK-DAG: add sp, sp, #16 + +; CHECK-M4F-LABEL: test_hfa_align_stack_call: +; CHECK-M4F-DAG: sub sp, #16 +; CHECK-M4F-DAG: mov.w r0, #1073741824 +; CHECK-M4F-DAG: mov.w r1, #1065353216 +; CHECK-M4F-DAG: strd r1, r0, [sp, #8] +; CHECK-M4F-DAG: bl test_hfa_align_stack +; CHECK-M4F-DAG: add sp, #16 + + %call = call arm_aapcs_vfpcc float @test_hfa_align_stack(double undef, double undef, double undef, double undef, double undef, double undef, double undef, double undef, float undef, %struct.hfa_align alignstack(8) { [2 x float] [float 1.000000e+00, float 2.000000e+00] }) #5 + ret float %call +} + Index: llvm/lib/Target/ARM/ARMCallingConv.cpp =================================================================== --- llvm/lib/Target/ARM/ARMCallingConv.cpp +++ llvm/lib/Target/ARM/ARMCallingConv.cpp @@ -266,7 +266,10 @@ // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll // be allocating a bunch of i32 slots). unsigned RestAlign = std::min(Align, Size); - + if (ArgFlags.getStackAlign()) { + const llvm::Align ArgStackAlign(ArgFlags.getStackAlign()); + Align = std::max(Align, unsigned(ArgStackAlign.value())); + } for (auto &It : PendingMembers) { It.convertToMem(State.AllocateStack(Size, Align)); State.addLoc(It); Index: clang/test/CodeGen/arm-aapcs-vfp.c =================================================================== --- clang/test/CodeGen/arm-aapcs-vfp.c +++ clang/test/CodeGen/arm-aapcs-vfp.c @@ -147,3 +147,17 @@ // is passed ByVal (due to being > 64 bytes), so the backend handles this instead. void test_vfp_stack_gpr_split_6(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, struct_seventeen_ints k) {} // CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_6(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, %struct.struct_seventeen_ints* byval(%struct.struct_seventeen_ints) align 4 %k) + +// Make sure over-alignment information is propagated to the backend properly +typedef struct { + __attribute__((__aligned__(8))) float v[2]; +} hfa_align; +// CHECK: define arm_aapcs_vfpcc float @test_hfa_align_arg(%struct.hfa_align alignstack(8) %h1.coerce) #0 +float test_hfa_align_arg(hfa_align h1) { + return h1.v[0]; +} +// CHECK: %call = call arm_aapcs_vfpcc float @test_hfa_align_arg(%struct.hfa_align alignstack(8) %1) #4 +float test_hfa_align_call() { + hfa_align h = {1.0, 2.0}; + return test_hfa_align_arg(h); +} Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -5934,7 +5934,10 @@ return ABIArgInfo::getDirect(Ty, 0, nullptr, false); } } - return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); + bool NeedsStackAlignment = getContext().getTypeAlignInChars(Ty) != + getContext().getTypeAlignInChars(Base); + return ABIArgInfo::getDirect(nullptr, /*Offset=*/0, /*Padding=*/nullptr, + /*CanBeFlattened=*/false, NeedsStackAlignment); } ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, @@ -6000,9 +6003,13 @@ uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) { assert(Base && Members <= 4 && "unexpected homogeneous aggregate"); + bool NeedsStackAlignment = getContext().getTypeAlignInChars(Ty) != + getContext().getTypeAlignInChars(Base); llvm::Type *Ty = llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members); - return ABIArgInfo::getDirect(Ty, 0, nullptr, false); + return ABIArgInfo::getDirect(Ty, /*Offset=*/0, /*Padding=*/nullptr, + /*CanBeFlattened=*/false, + NeedsStackAlignment); } }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits