Adhemerval Zanella writes: >> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c >> index e56398a..2cf239f 100644 >> --- a/gcc/config/aarch64/aarch64.c >> +++ b/gcc/config/aarch64/aarch64.c >> @@ -3227,6 +3227,34 @@ aarch64_expand_prologue (void) >> RTX_FRAME_RELATED_P (insn) = 1; >> } >> } >> + >> + if (flag_split_stack && offset) >> + { >> + /* Setup the argument pointer (x10) for -fsplit-stack code. If >> + __morestack was called, it will left the arg pointer to the >> + old stack in x28. Otherwise, the argument pointer is the top >> + of current frame. */ >> + rtx x10 = gen_rtx_REG (Pmode, R10_REGNUM); >> + rtx x11 = gen_rtx_REG (Pmode, R11_REGNUM); >> + rtx x28 = gen_rtx_REG (Pmode, R28_REGNUM); >> + rtx x29 = gen_rtx_REG (Pmode, R29_REGNUM); >> + rtx not_more = gen_label_rtx (); >> + rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM); >> + rtx jump; >> + >> + emit_move_insn (x11, GEN_INT (hard_fp_offset)); >> + emit_insn (gen_add3_insn (x10, x29, x11)); >> + jump = gen_rtx_IF_THEN_ELSE (VOIDmode, >> + gen_rtx_GEU (VOIDmode, cc_reg, >> + const0_rtx), >> + gen_rtx_LABEL_REF (VOIDmode, not_more), >> + pc_rtx); >> + jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump)); >> + JUMP_LABEL (jump) = not_more; >> + LABEL_NUSES (not_more) += 1; >> + emit_move_insn (x10, x28); >> + emit_label (not_more); >> + } >> }
This part needs rebase, there are major changes in AArch64 prologue code recently. >> >> /* Return TRUE if we can use a simple_return insn. >> @@ -3303,6 +3331,7 @@ aarch64_expand_epilogue (bool for_sibcall) >> offset = offset - fp_offset; >> } >> >> + Unncessary new line. >> if (offset > 0) >> { >> unsigned reg1 = cfun->machine->frame.wb_candidate1; >> @@ -9648,7 +9677,7 @@ aarch64_expand_builtin_va_start (tree valist, rtx >> nextarg ATTRIBUTE_UNUSED) >> /* Emit code to initialize STACK, which points to the next varargs stack >> argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used >> by named arguments. STACK is 8-byte aligned. */ >> - t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx); >> + t = make_tree (TREE_TYPE (stack), crtl->args.internal_arg_pointer); >> if (cum->aapcs_stack_size > 0) >> t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * >> UNITS_PER_WORD); >> t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t); >> @@ -14010,6 +14039,196 @@ aarch64_optab_supported_p (int op, machine_mode >> mode1, machine_mode, >> } >> } >> >> +/* -fsplit-stack support. */ >> + >> +/* A SYMBOL_REF for __morestack. */ >> +static GTY(()) rtx morestack_ref; >> + >> +/* Emit -fsplit-stack prologue, which goes before the regular function >> + prologue. */ >> +void >> +aarch64_expand_split_stack_prologue (void) >> +{ >> + HOST_WIDE_INT frame_size, args_size; >> + rtx_code_label *ok_label = NULL; >> + rtx mem, ssvalue, compare, jump, insn, call_fusage; >> + rtx reg11, reg30, temp; >> + rtx new_cfa, cfi_ops = NULL; >> + /* Offset from thread pointer to __private_ss. */ >> + int psso = 0x10; >> + int ninsn; >> + >> + gcc_assert (flag_split_stack && reload_completed); >> + >> + /* It limits total maximum stack allocation on 2G so its value can be >> + materialized with two instruction at most (movn/movk). It might be >> + used by the linker to add some extra space for split calling non split >> + stack functions. */ >> + frame_size = cfun->machine->frame.frame_size; >> + if (frame_size > ((HOST_WIDE_INT) 1 << 31)) >> + { >> + sorry ("Stack frame larger than 2G is not supported for >> -fsplit-stack"); >> + return; >> + } >> + >> + if (morestack_ref == NULL_RTX) >> + { >> + morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); >> + SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL >> + | SYMBOL_FLAG_FUNCTION); >> + } >> + >> + /* Load __private_ss from TCB. */ >> + ssvalue = gen_rtx_REG (Pmode, R9_REGNUM); >> + emit_insn (gen_aarch64_load_tp_hard (ssvalue)); >> + mem = gen_rtx_MEM (Pmode, plus_constant (Pmode, ssvalue, psso)); >> + emit_move_insn (ssvalue, mem); >> + >> + temp = gen_rtx_REG (Pmode, R10_REGNUM); >> + >> + /* Always emit two insns to calculate the requested stack, so the linker >> + can edit them when adjusting size for calling non-split-stack code. */ >> + ninsn = aarch64_internal_mov_immediate (temp, GEN_INT (-frame_size), true, >> + Pmode); >> + gcc_assert (ninsn == 1 || ninsn == 2); >> + if (ninsn == 1) >> + emit_insn (gen_nop ()); there will be trouble to linker if the following add is scheduled before the nop? >> diff --git a/libgcc/config/aarch64/morestack.S >> b/libgcc/config/aarch64/morestack.S >> new file mode 100644 >> ... >> + # Set __private_ss stack guard for the new stack. >> + ldr x9, [x28, STACKFRAME_BASE + NEWSTACK_SAVE] >> + add x0, x0, BACKOFF >> + sub x0, x0, 16 >> + sub x0, x0, x9 >> +.LEHB0: >> + mrs x1, tpidr_el0 >> + str x0, [x1, TCB_PRIVATE_SS] >> + >> + # void __morestack_unblock_signals (void) >> + bl __morestack_unblock_signals >> + >> + # Set up for a call to the target function. >> + #ldp x29, x30, [x28, STACKFRAME_BASE] >> + ldr x30, [x28, STACKFRAME_BASE + 8] >> + ldp x0, x1, [x28, STACKFRAME_BASE + 16] >> + ldp x2, x3, [x28, STACKFRAME_BASE + 32] >> + ldp x4, x5, [x28, STACKFRAME_BASE + 48] >> + ldp x6, x7, [x28, STACKFRAME_BASE + 64] >> + add x9, x30, 8 >> + cmp x30, x9 Can you explain why do we need this "cmp" before jumping to target function? >> + blr x9 >> + >> + stp x0, x1, [x28, STACKFRAME_BASE + 16] >> + stp x2, x3, [x28, STACKFRAME_BASE + 32] >> + stp x4, x5, [x28, STACKFRAME_BASE + 48] >> + stp x6, x7, [x28, STACKFRAME_BASE + 64] >> + >> + bl __morestack_block_signals >> + >> + # void *__generic_releasestack (size_t *pavailable) >> + add x0, x28, STACKFRAME_BASE + NEWSTACK_SAVE >> + bl __generic_releasestack >> + >> + # Reset __private_ss stack guard to value for old stack >> + ldr x9, [x28, STACKFRAME_BASE + NEWSTACK_SAVE] >> + add x0, x0, BACKOFF >> + sub x0, x0, x9 >> + >> + # Update TCB split stack field >> +.LEHE0: >> + mrs x1, tpidr_el0 >> + str x0, [x1, TCB_PRIVATE_SS] >> + >> + bl __morestack_unblock_signals >> + >> + # Use old stack again. >> + sub sp, x28, 16 >> + >> + ldp x0, x1, [x28, STACKFRAME_BASE + 16] >> + ldp x2, x3, [x28, STACKFRAME_BASE + 32] >> + ldp x4, x5, [x28, STACKFRAME_BASE + 48] >> + ldp x6, x7, [x28, STACKFRAME_BASE + 64] >> + ldp x29, x30, [x28, STACKFRAME_BASE] >> + ldr x28, [x28, STACKFRAME_BASE + 96] >> + >> + .cfi_remember_state >> + .cfi_restore 30 >> + .cfi_restore 29 >> + .cfi_def_cfa 31, 0 >> + >> + ret >> + >> +# This is the cleanup code called by the stack unwinder when >> +# unwinding through code between .LEHB0 and .LEHE0 above. >> +cleanup: >> + .cfi_restore_state >> + str x0, [x28, STACKFRAME_BASE] >> + # size_t __generic_findstack (void *stack) >> + mov x0, x28 >> + bl __generic_findstack >> + sub x0, x28, x0 >> + add x0, x0, BACKOFF >> + # Restore tcbhead_t.__private_ss >> + mrs x1, tpidr_el0 >> + str x0, [x1, TCB_PRIVATE_SS] >> + ldr x0, [x28, STACKFRAME_BASE] >> + b _Unwind_Resume >> + .cfi_endproc >> +END(__morestack) >> + >> + .section .gcc_except_table,"a",@progbits >> + .align 4 >> +.LLSDA1: >> + # @LPStart format (omit) >> + .byte 0xff >> + # @TType format (omit) >> + .byte 0xff >> + # Call-site format (uleb128) >> + .byte 0x1 >> + # Call-site table length >> + .uleb128 .LLSDACSE1-.LLSDACSB1 >> +.LLSDACSB1: >> + # region 0 start >> + .uleb128 .LEHB0-.LFB1 >> + # length >> + .uleb128 .LEHE0-.LEHB0 >> + # landing pad >> + .uleb128 cleanup-.LFB1 >> + # no action (ie a cleanup) >> + .uleb128 0 >> +.LLSDACSE1: >> + >> + >> + .global __gcc_personality_v0 >> +#ifdef __PIC__ >> + # Build a position independent reference to the personality function. >> + .hidden DW.ref.__gcc_personality_v0 >> + .weak DW.ref.__gcc_personality_v0 >> + .section >> .data.DW.ref.__gcc_personality_v0,"awG",@progbits,DW.ref.__gcc_personality_v0,comdat >> + .type DW.ref.__gcc_personality_v0, @object >> + .align 3 >> +DW.ref.__gcc_personality_v0: >> + .size DW.ref.__gcc_personality_v0, 8 >> + .quad __gcc_personality_v0 >> +#endif >> + >> + .section .note.GNU-stack,"",@progbits >> + .section .note.GNU-split-stack,"",@progbits >> + .section .note.GNU-no-split-stack,"",@progbits >> diff --git a/libgcc/config/aarch64/t-stack-aarch64 >> b/libgcc/config/aarch64/t-stack-aarch64 >> new file mode 100644 >> index 0000000..4babb4e >> --- /dev/null >> +++ b/libgcc/config/aarch64/t-stack-aarch64 >> @@ -0,0 +1,3 @@ >> +# Makefile fragment to support -fsplit-stack for aarch64. >> +LIB2ADD_ST += $(srcdir)/config/aarch64/morestack.S \ >> + $(srcdir)/config/aarch64/morestack-c.c >> diff --git a/libgcc/generic-morestack.c b/libgcc/generic-morestack.c >> index b8eec4e..fe7092b 100644 >> --- a/libgcc/generic-morestack.c >> +++ b/libgcc/generic-morestack.c >> @@ -943,6 +943,7 @@ __splitstack_find (void *segment_arg, void *sp, size_t >> *len, >> nsp -= 2 * 160; >> #elif defined __s390__ >> nsp -= 2 * 96; >> +#elif defined __aarch64__ >> #else >> #error "unrecognized target" >> #endif >> -- Regards, Jiong