The problem here is that on a normal return path, we still restore the eh data return when we should not. Instead of one return path in the case of eh_return, this changes over to use multiple returns pathes just like a normal function. On the normal path (non-eh return), we need to skip restoring of the eh return data registers.
This fixes the code generation of _Unwind_RaiseException where the return value was currupted. Note this adds some testcases which might fail on some targets. I know of the following targets will fail also: arm is recorded as PR 114847. powerpc is recorded as PR 114846. Build and tested for aarch64-linux-gnu with no regressions. Changes in: * v2: Fix logical error in aarch64_pop_regs which was a premature optimization. Check regno1 and regno2 independently now. Also add eh_return-5.c which tests that case. * v3: Instead of redoing the detection of the eh_return register store off to frame.eh_return_allocated. Also don't consider eh_return data registers as pop canidates. Note v2 was not submitted. PR target/114843 gcc/ChangeLog: * config/aarch64/aarch64-protos.h (aarch64_expand_epilogue): New prototype. * config/aarch64/aarch64.h (EH_RETURN_DATA_REGISTERS_N): New define. (EH_RETURN_DATA_REGNO): Use EH_RETURN_DATA_REGISTERS_N instead of hard coding 4. (aarch64_frame): Add eh_return_allocated. * config/aarch64/aarch64.cc (aarch64_restore_callee_saves): Skip over the eh return data regs if not eh return. (aarch64_expand_epilogue): New function, pass false. (aarch64_expand_epilogue): Add was_eh_return argument. Update calls to aarch64_restore_callee_saves and aarch64_pop_regs. For eh_returns, update the sp and do an indirect jump. Don't check EH_RETURN_TAKEN_RTX any more. * config/aarch64/aarch64.h (EH_RETURN_TAKEN_RTX): Delete. * config/aarch64/aarch64.md (eh_return): New define_expand. (eh_return_internal): New pattern for eh_returns. gcc/testsuite/ChangeLog: * gcc.target/aarch64/eh_return-3.c: Update testcase. * gcc.c-torture/execute/eh_return-1.c: New test. * gcc.c-torture/execute/eh_return-2.c: New test. * gcc.c-torture/execute/eh_return-3.c: New test. * gcc.c-torture/execute/eh_return-4.c: New test. * gcc.c-torture/execute/eh_return-5.c: New test. * gcc.target/aarch64/eh_return-4.c: New test. Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> --- gcc/config/aarch64/aarch64-protos.h | 1 + gcc/config/aarch64/aarch64.cc | 78 ++++++++++++++----- gcc/config/aarch64/aarch64.h | 14 ++-- gcc/config/aarch64/aarch64.md | 24 ++++++ .../gcc.c-torture/execute/eh_return-1.c | 20 +++++ .../gcc.c-torture/execute/eh_return-2.c | 23 ++++++ .../gcc.c-torture/execute/eh_return-3.c | 25 ++++++ .../gcc.c-torture/execute/eh_return-4.c | 25 ++++++ .../gcc.c-torture/execute/eh_return-5.c | 24 ++++++ .../gcc.target/aarch64/eh_return-3.c | 12 ++- .../gcc.target/aarch64/eh_return-4.c | 32 ++++++++ 11 files changed, 244 insertions(+), 34 deletions(-) create mode 100644 gcc/testsuite/gcc.c-torture/execute/eh_return-1.c create mode 100644 gcc/testsuite/gcc.c-torture/execute/eh_return-2.c create mode 100644 gcc/testsuite/gcc.c-torture/execute/eh_return-3.c create mode 100644 gcc/testsuite/gcc.c-torture/execute/eh_return-4.c create mode 100644 gcc/testsuite/gcc.c-torture/execute/eh_return-5.c create mode 100644 gcc/testsuite/gcc.target/aarch64/eh_return-4.c diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 42639e9efcf..efe86d52873 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -904,6 +904,7 @@ const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *); const char * aarch64_output_probe_stack_range (rtx, rtx); const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx); void aarch64_err_no_fpadvsimd (machine_mode); +void aarch64_expand_epilogue (rtx_call_insn *, bool); void aarch64_expand_epilogue (rtx_call_insn *); rtx aarch64_ptrue_all (unsigned int); opt_machine_mode aarch64_ptrue_all_mode (rtx); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 662ff5a9b0c..afbe4eeb340 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -7792,6 +7792,7 @@ aarch64_layout_frame (void) #define SLOT_NOT_REQUIRED (-2) #define SLOT_REQUIRED (-1) +#define SLOT_EH_RETURN_REQUIRED (-3) frame.wb_push_candidate1 = INVALID_REGNUM; frame.wb_push_candidate2 = INVALID_REGNUM; @@ -7805,7 +7806,7 @@ aarch64_layout_frame (void) /* ... that includes the eh data registers (if needed)... */ if (crtl->calls_eh_return) for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++) - frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = SLOT_REQUIRED; + frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = SLOT_EH_RETURN_REQUIRED; /* ... and any callee saved register that dataflow says is live. */ for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) @@ -7949,6 +7950,18 @@ aarch64_layout_frame (void) stopping it from being individually shrink-wrapped. */ allocate_gpr_slot (R30_REGNUM); + /* Allocate the eh_return first. */ + if (crtl->calls_eh_return) + for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++) + { + int realregno = EH_RETURN_DATA_REGNO (regno); + if (known_eq (frame.reg_offset[realregno], SLOT_EH_RETURN_REQUIRED)) + { + frame.eh_return_allocated[regno] = true; + allocate_gpr_slot (realregno); + } + } + for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) allocate_gpr_slot (regno); @@ -8035,6 +8048,23 @@ aarch64_layout_frame (void) frame.wb_pop_candidate1 = frame.wb_push_candidate1; frame.wb_pop_candidate2 = frame.wb_push_candidate2; + /* EH data registers are not pop canidates. */ + if (crtl->calls_eh_return) + for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++) + { + if (frame.eh_return_allocated[regno] + && frame.wb_pop_candidate1 == EH_RETURN_DATA_REGNO (regno)) + { + frame.wb_pop_candidate1 = frame.wb_pop_candidate2; + frame.wb_pop_candidate2 = INVALID_REGNUM; + } + if (frame.eh_return_allocated[regno] + && frame.wb_pop_candidate2 == EH_RETURN_DATA_REGNO (regno)) + { + frame.wb_pop_candidate2 = INVALID_REGNUM; + } + } + /* Shadow call stack only deals with functions where the LR is pushed onto the stack and without specifying the "no_sanitize" attribute with the argument "shadow-call-stack". */ @@ -8662,7 +8692,8 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, static void aarch64_restore_callee_saves (poly_int64 bytes_below_sp, - array_slice<unsigned int> regs, rtx *cfi_ops) + array_slice<unsigned int> regs, rtx *cfi_ops, + bool was_eh_return = false) { aarch64_frame &frame = cfun->machine->frame; poly_int64 offset; @@ -8681,6 +8712,20 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, if (frame.is_scs_enabled && regno == LR_REGNUM) return true; + /* Skip the eh return data registers if we are + returning normally rather than via eh_return. */ + if (!was_eh_return && crtl->calls_eh_return) + { + for (unsigned ehregno = 0; + EH_RETURN_DATA_REGNO (ehregno) != INVALID_REGNUM; + ehregno++) + { + if (EH_RETURN_DATA_REGNO (ehregno) == regno + && frame.eh_return_allocated[ehregno]) + return true; + } + } + return false; }; @@ -9805,6 +9850,11 @@ aarch64_use_return_insn_p (void) return known_eq (cfun->machine->frame.frame_size, 0); } +void +aarch64_expand_epilogue (rtx_call_insn *sibcall) +{ + aarch64_expand_epilogue (sibcall, false); +} /* Generate the epilogue instructions for returning from a function. This is almost exactly the reverse of the prolog sequence, except @@ -9812,7 +9862,7 @@ aarch64_use_return_insn_p (void) from a deallocated stack, and we optimize the unwind records by emitting them all together if possible. */ void -aarch64_expand_epilogue (rtx_call_insn *sibcall) +aarch64_expand_epilogue (rtx_call_insn *sibcall, bool was_eh_return) { aarch64_frame &frame = cfun->machine->frame; poly_int64 initial_adjust = frame.initial_adjust; @@ -9919,7 +9969,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) restore x30, we don't need to restore x30 again in the traditional way. */ aarch64_restore_callee_saves (final_adjust + sve_callee_adjust, - frame.saved_gprs, &cfi_ops); + frame.saved_gprs, &cfi_ops, was_eh_return); if (need_barrier_p) aarch64_emit_stack_tie (stack_pointer_rtx); @@ -9968,26 +10018,12 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) } /* Stack adjustment for exception handler. */ - if (crtl->calls_eh_return && !sibcall) - { - /* If the EH_RETURN_TAKEN_RTX flag is set then we need - to unwind the stack and jump to the handler, otherwise - skip this eh_return logic and continue with normal - return after the label. We have already reset the CFA - to be SP; letting the CFA move during this adjustment - is just as correct as retaining the CFA from the body - of the function. Therefore, do nothing special. */ - rtx_code_label *label = gen_label_rtx (); - rtx x = aarch64_gen_compare_zero_and_branch (EQ, EH_RETURN_TAKEN_RTX, - label); - rtx jump = emit_jump_insn (x); - JUMP_LABEL (jump) = label; - LABEL_NUSES (label)++; + if (was_eh_return && !sibcall) + { emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX)); emit_jump_insn (gen_indirect_jump (EH_RETURN_HANDLER_RTX)); - emit_barrier (); - emit_label (label); + return; } /* We prefer to emit the combined return/authenticate instruction RETAA, diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 4fa1dfc7906..bfcd5e84510 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -700,8 +700,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; #define DWARF2_UNWIND_INFO 1 /* Use R0 through R3 to pass exception handling information. */ +#define EH_RETURN_DATA_REGISTERS_N 4 #define EH_RETURN_DATA_REGNO(N) \ - ((N) < 4 ? ((unsigned int) R0_REGNUM + (N)) : INVALID_REGNUM) + ((N) < EH_RETURN_DATA_REGISTERS_N ? ((unsigned int) R0_REGNUM + (N)) : INVALID_REGNUM) /* Select a format to encode pointers in exception handling data. */ #define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \ @@ -723,10 +724,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; /* Output assembly strings after .cfi_startproc is emitted. */ #define ASM_POST_CFI_STARTPROC aarch64_post_cfi_startproc -/* For EH returns X4 is a flag that is set in the EH return - code paths and then X5 and X6 contain the stack adjustment +/* For EH returns X5 and X6 contain the stack adjustment and return address respectively. */ -#define EH_RETURN_TAKEN_RTX gen_rtx_REG (Pmode, R4_REGNUM) #define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R5_REGNUM) #define EH_RETURN_HANDLER_RTX gen_rtx_REG (Pmode, R6_REGNUM) @@ -929,6 +928,7 @@ struct GTY (()) aarch64_frame outgoing arguments) of each register save slot, or -2 if no save is needed. */ poly_int64 reg_offset[LAST_SAVED_REGNUM + 1]; + bool eh_return_allocated[EH_RETURN_DATA_REGISTERS_N]; /* The list of GPRs, FPRs and predicate registers that have nonnegative entries in reg_offset. The registers are listed in order of @@ -1015,10 +1015,12 @@ struct GTY (()) aarch64_frame eventually. The initial value of a pop candidate is copied from its corresponding push candidate. - Currently, different pop candidates are only used for shadow call + Different pop candidates are used for shadow call stack. When "-fsanitize=shadow-call-stack" is specified, we replace x30 in the pop candidate with INVALID_REGNUM to ensure that x30 is - not popped twice. */ + not popped twice. + + Also eh_return data registers are not pop candidates. */ unsigned wb_push_candidate1; unsigned wb_push_candidate2; unsigned wb_pop_candidate1; diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 046a249475d..a6cedd0f1b8 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1058,6 +1058,30 @@ (define_insn "simple_return" (set_attr "sls_length" "retbr")] ) +;; This is used in compiling the unwind routines. +(define_expand "eh_return" + [(use (match_operand 0 "general_operand"))] + "" +{ + emit_move_insn (EH_RETURN_HANDLER_RTX, operands[0]); + + emit_jump_insn (gen_eh_return_internal ()); + emit_barrier (); + DONE; +}) + +(define_insn_and_split "eh_return_internal" + [(eh_return)] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + aarch64_expand_epilogue (nullptr, true); + DONE; +}) + + (define_insn "aarch64_cb<optab><mode>1" [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") (const_int 0)) diff --git a/gcc/testsuite/gcc.c-torture/execute/eh_return-1.c b/gcc/testsuite/gcc.c-torture/execute/eh_return-1.c new file mode 100644 index 00000000000..3928a58d841 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/eh_return-1.c @@ -0,0 +1,20 @@ +/* { dg-do run } */ +/* { dg-require-effective-target builtin_eh_return } */ + +/* PR target/114843 */ +/* Test to make sure the return value does not get clobbered. */ +__attribute__((noipa)) +int f(int *a, long offset, void *handler) +{ + if (*a == 5) + return 5; + __builtin_eh_return (offset, handler); +} + +int main() +{ + int t = 5; + if (f(&t, 0, 0) != 5) + __builtin_abort(); +} + diff --git a/gcc/testsuite/gcc.c-torture/execute/eh_return-2.c b/gcc/testsuite/gcc.c-torture/execute/eh_return-2.c new file mode 100644 index 00000000000..41fb48c99f9 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/eh_return-2.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target builtin_eh_return } */ + +/* PR target/114843 */ +/* Test to make sure the return value does not get clobbered, also use alloca. */ +__attribute__((noipa)) +void g(void*) {} +__attribute__((noipa)) +int f(int *a, long offset, void *handler) +{ + g(__builtin_alloca(offset)); + if (*a == 5) + return 5; + __builtin_eh_return (offset, handler); +} + +int main() +{ + int t = 5; + if (f(&t, 67, 0) != 5) + __builtin_abort(); +} + diff --git a/gcc/testsuite/gcc.c-torture/execute/eh_return-3.c b/gcc/testsuite/gcc.c-torture/execute/eh_return-3.c new file mode 100644 index 00000000000..dfd73d35a43 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/eh_return-3.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-require-effective-target builtin_eh_return } */ + + +/* PR target/114843 */ +/* Test to make sure the return value does not get clobbered, also use large stack. */ +__attribute__((noipa)) +void g(void *a) {} +__attribute__((noipa)) +int f(int *a, long offset, void *handler) +{ + int h[10245]; + g(h); + if (*a == 5) + return 5; + __builtin_eh_return (offset, handler); +} + +int main() +{ + int t = 5; + if (f(&t, 67, 0) != 5) + __builtin_abort(); +} + diff --git a/gcc/testsuite/gcc.c-torture/execute/eh_return-4.c b/gcc/testsuite/gcc.c-torture/execute/eh_return-4.c new file mode 100644 index 00000000000..2f1126ba7c5 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/eh_return-4.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-require-effective-target builtin_eh_return } */ + +/* PR target/114843 */ +/* Test to make sure the return value does not get clobbered, using sibcall. */ +__attribute__((noipa)) +int g() +{ + return 5; +} +__attribute__((noipa)) +int f(int *a, long offset, void *handler) +{ + if (*a == 5) + return g(); + __builtin_eh_return (offset, handler); +} + +int main() +{ + int t = 5; + if (f(&t, 67, 0) != 5) + __builtin_abort(); +} + diff --git a/gcc/testsuite/gcc.c-torture/execute/eh_return-5.c b/gcc/testsuite/gcc.c-torture/execute/eh_return-5.c new file mode 100644 index 00000000000..50b1b181b09 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/eh_return-5.c @@ -0,0 +1,24 @@ +/* { dg-do run } */ +/* { dg-require-effective-target builtin_eh_return } */ + +/* PR target/114843 */ +/* Test to make sure the return value does not get clobbered. */ +__attribute__((noipa)) +void g(void){} +int arr[6]={0,1,2,3,4,5}; +__attribute__((noipa)) +int f(long *offset, void *handler) +{ + g(); + if (*offset < 5) + return arr[*offset]; + __builtin_eh_return (*offset, handler); +} + +int main() +{ + long o = 1; + if (f(&o, 0) != 1) + __builtin_abort(); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return-3.c b/gcc/testsuite/gcc.target/aarch64/eh_return-3.c index d180fa7c455..2ed9b53b3f1 100644 --- a/gcc/testsuite/gcc.target/aarch64/eh_return-3.c +++ b/gcc/testsuite/gcc.target/aarch64/eh_return-3.c @@ -7,11 +7,7 @@ ** hint 25 // paciasp ** ... ** cbz w2, .* -** mov x4, 0 -** ... -** cbz x4, .* -** add sp, sp, x5 -** br x6 +** add sp, sp, 32 ** ( ** hint 29 // autiasp ** ret @@ -19,9 +15,11 @@ ** retaa ** ) ** mov x5, x0 -** mov x4, 1 ** mov x6, x1 -** b .* +** ... +** add sp, sp, 32 +** add sp, sp, x5 +** br x6 */ void foo (unsigned long off, void *handler, int c) diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return-4.c b/gcc/testsuite/gcc.target/aarch64/eh_return-4.c new file mode 100644 index 00000000000..7c58765dab2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/eh_return-4.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbranch-protection=pac-ret+leaf -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* +**foo: +** hint 25 // paciasp +** sub sp, sp, #32 +** ... +** mov x5, x0 +** cbz w2, .* +** mov w0, 5 +** add sp, sp, 32 +** ( +** hint 29 // autiasp +** ret +** | +** retaa +** ) +** mov x6, x1 +** ... +** add sp, sp, 32 +** add sp, sp, x5 +** br x6 +*/ +int +foo (unsigned long off, void *handler, int c) +{ + if (c) + return 5; + __builtin_eh_return (off, handler); +} -- 2.43.0