Hi, In r14-6604-gd7ee988c491cde43d04fe25f2b3dbad9d85ded45 we changed the CFI notes attached to callee saves (in aarch64_save_callee_saves). That patch changed the ldp/stp representation to use unspecs instead of PARALLEL moves. This meant that we needed to attach CFI notes to all frame-related pair saves such that dwarf2cfi could still emit the appropriate CFI (it cannot interpret the unspecs directly). The patch also attached REG_CFA_OFFSET notes to individual saves so that the ldp/stp pass could easily preserve them when forming stps.
In that change I chose to use REG_CFA_OFFSET, but as the PR shows, that choice was problematic in that REG_CFA_OFFSET requires the attached store to be expressed in terms of the current CFA register at all times. This means that even scheduling of frame-related insns can break this invariant, leading to ICEs in dwarf2cfi. The old behaviour (before that change) allowed dwarf2cfi to interpret the RTL directly for sp-relative saves. This change restores that behaviour by using REG_FRAME_RELATED_EXPR instead of REG_CFA_OFFSET. REG_FRAME_RELATED_EXPR effectively just gives a different pattern for dwarf2cfi to look at instead of the main insn pattern. That allows us to attach the old-style PARALLEL move representation in a REG_FRAME_RELATED_EXPR note and means we are free to always express the save addresses in terms of the stack pointer. Since the ldp/stp fusion pass can combine frame-related stores, this patch also updates it to preserve REG_FRAME_RELATED_EXPR notes, and additionally gives it the ability to synthesize those notes when combining sp-relative saves into an stp (the latter always needs a note due to the unspec representation, the former does not). Bootstrapped/regetested on aarch64-linux-gnu, OK for trunk? Thanks, Alex gcc/ChangeLog: PR target/113077 * config/aarch64/aarch64-ldp-fusion.cc (filter_notes): Add fr_expr param to extract REG_FRAME_RELATED_EXPR notes. (combine_reg_notes): Handle REG_FRAME_RELATED_EXPR notes, and synthesize these if needed. Update caller ... (ldp_bb_info::fuse_pair): ... here. * config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use REG_FRAME_RELATED_EXPR instead of REG_CFA_OFFSET. gcc/testsuite/ChangeLog: PR target/113077 * gcc.target/aarch64/pr113077.c: New test.
diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc b/gcc/config/aarch64/aarch64-ldp-fusion.cc index 2fe1b1d4d84..00bc8b749c8 100644 --- a/gcc/config/aarch64/aarch64-ldp-fusion.cc +++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc @@ -904,9 +904,11 @@ aarch64_operand_mode_for_pair_mode (machine_mode mode) // Go through the reg notes rooted at NOTE, dropping those that we should drop, // and preserving those that we want to keep by prepending them to (and // returning) RESULT. EH_REGION is used to make sure we have at most one -// REG_EH_REGION note in the resulting list. +// REG_EH_REGION note in the resulting list. FR_EXPR is used to return any +// REG_FRAME_RELATED_EXPR note we find, as these can need special handling in +// combine_reg_notes. static rtx -filter_notes (rtx note, rtx result, bool *eh_region) +filter_notes (rtx note, rtx result, bool *eh_region, rtx *fr_expr) { for (; note; note = XEXP (note, 1)) { @@ -940,6 +942,10 @@ filter_notes (rtx note, rtx result, bool *eh_region) copy_rtx (XEXP (note, 0)), result); break; + case REG_FRAME_RELATED_EXPR: + gcc_assert (!*fr_expr); + *fr_expr = copy_rtx (XEXP (note, 0)); + break; default: // Unexpected REG_NOTE kind. gcc_unreachable (); @@ -951,13 +957,65 @@ filter_notes (rtx note, rtx result, bool *eh_region) // Return the notes that should be attached to a combination of I1 and I2, where // *I1 < *I2. +// +// LOAD_P is true for loads, REVERSED is true if the insns in +// program order are not in offset order, BASE_REGNO is the chosen base +// register number for the pair, and PATS gives the final RTL patterns for the +// accesses. static rtx -combine_reg_notes (insn_info *i1, insn_info *i2) +combine_reg_notes (insn_info *i1, insn_info *i2, + bool load_p, bool reversed, + int base_regno, rtx pats[2]) { + // Temporary storage for REG_FRAME_RELATED_EXPR notes. + rtx fr_expr[2] = {}; + bool found_eh_region = false; rtx result = NULL_RTX; - result = filter_notes (REG_NOTES (i2->rtl ()), result, &found_eh_region); - return filter_notes (REG_NOTES (i1->rtl ()), result, &found_eh_region); + result = filter_notes (REG_NOTES (i2->rtl ()), result, + &found_eh_region, fr_expr); + result = filter_notes (REG_NOTES (i1->rtl ()), result, + &found_eh_region, fr_expr + 1); + + if (!load_p) + { + // Frame-related saves must either be sp-based or must already have + // a REG_FRAME_RELATED_EXPR note. + if (RTX_FRAME_RELATED_P (i1->rtl ()) && !fr_expr[0]) + { + gcc_assert (base_regno == SP_REGNUM); + fr_expr[0] = copy_rtx (pats[reversed]); + } + if (RTX_FRAME_RELATED_P (i2->rtl ()) && !fr_expr[1]) + { + gcc_assert (base_regno == SP_REGNUM); + fr_expr[1] = copy_rtx (pats[!reversed]); + } + } + + // We just built FR_EXPR in program order, now we want it in + // offset order. + if (reversed) + std::swap (fr_expr[0], fr_expr[1]); + + rtx fr_pat = NULL_RTX; + if (fr_expr[0] && fr_expr[1]) + { + // Combining two frame-related insns, need to construct + // a REG_FRAME_RELATED_EXPR note which represents the combined + // operation. + RTX_FRAME_RELATED_P (fr_expr[1]) = 1; + fr_pat = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, fr_expr[0], fr_expr[1])); + } + else + fr_pat = fr_expr[0] ? fr_expr[0] : fr_expr[1]; + + if (fr_pat) + result = alloc_reg_note (REG_FRAME_RELATED_EXPR, + fr_pat, result); + + return result; } // Given two memory accesses in PATS, at least one of which is of a @@ -1380,7 +1438,8 @@ ldp_bb_info::fuse_pair (bool load_p, return false; } - rtx reg_notes = combine_reg_notes (first, second); + rtx reg_notes = combine_reg_notes (first, second, load_p, + i1 != first, base_regno, pats); rtx pair_pat; if (writeback_effect) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index a5a6b52730d..7b60e874334 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -8465,7 +8465,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, emit_move_insn (move_src, gen_int_mode (aarch64_sve_vg, DImode)); } rtx base_rtx = stack_pointer_rtx; - poly_int64 cfa_offset = offset; + poly_int64 sp_offset = offset; HOST_WIDE_INT const_offset; if (mode == VNx2DImode && BYTES_BIG_ENDIAN) @@ -8490,17 +8490,12 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, offset -= fp_offset; } rtx mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); + rtx cfi_mem = gen_frame_mem (mode, plus_constant (Pmode, + stack_pointer_rtx, + sp_offset)); + rtx cfi_set = gen_rtx_SET (cfi_mem, reg); + bool need_cfi_note_p = (base_rtx != stack_pointer_rtx); - rtx cfa_base = stack_pointer_rtx; - if (hard_fp_valid_p && frame_pointer_needed) - { - cfa_base = hard_frame_pointer_rtx; - cfa_offset += (bytes_below_sp - frame.bytes_below_hard_fp); - } - - rtx cfa_mem = gen_frame_mem (mode, - plus_constant (Pmode, - cfa_base, cfa_offset)); unsigned int regno2; if (!aarch64_sve_mode_p (mode) && reg == move_src @@ -8514,34 +8509,48 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, offset += GET_MODE_SIZE (mode); insn = emit_insn (aarch64_gen_store_pair (mem, reg, reg2)); - /* The first part of a frame-related parallel insn is - always assumed to be relevant to the frame - calculations; subsequent parts, are only - frame-related if explicitly marked. */ + rtx cfi_mem2 + = gen_frame_mem (mode, + plus_constant (Pmode, + stack_pointer_rtx, + sp_offset + GET_MODE_SIZE (mode))); + rtx cfi_set2 = gen_rtx_SET (cfi_mem2, reg2); + + /* The first part of a frame-related parallel insn is always + assumed to be relevant to the frame calculations; + subsequent parts, are only frame-related if + explicitly marked. */ if (aarch64_emit_cfi_for_reg_p (regno2)) - { - const auto off = cfa_offset + GET_MODE_SIZE (mode); - rtx cfa_mem2 = gen_frame_mem (mode, - plus_constant (Pmode, - cfa_base, - off)); - add_reg_note (insn, REG_CFA_OFFSET, - gen_rtx_SET (cfa_mem2, reg2)); - } + RTX_FRAME_RELATED_P (cfi_set2) = 1; + + /* Add a REG_FRAME_RELATED_EXPR note since the unspec + representation of stp cannot be understood directly by + dwarf2cfi. */ + rtx par = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, + cfi_set, cfi_set2)); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, par); regno = regno2; ++i; } - else if (mode == VNx2DImode && BYTES_BIG_ENDIAN) - insn = emit_insn (gen_aarch64_pred_mov (mode, mem, ptrue, move_src)); - else if (aarch64_sve_mode_p (mode)) - insn = emit_insn (gen_rtx_SET (mem, move_src)); else - insn = emit_move_insn (mem, move_src); + { + if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + { + insn = emit_insn (gen_aarch64_pred_mov (mode, mem, ptrue, move_src)); + need_cfi_note_p = true; + } + else if (aarch64_sve_mode_p (mode)) + insn = emit_insn (gen_rtx_SET (mem, move_src)); + else + insn = emit_move_insn (mem, move_src); + + if (frame_related_p && (need_cfi_note_p || move_src != reg)) + add_reg_note (insn, REG_FRAME_RELATED_EXPR, cfi_set); + } RTX_FRAME_RELATED_P (insn) = frame_related_p; - if (frame_related_p) - add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (cfa_mem, reg)); /* Emit a fake instruction to indicate that the VG save slot has been initialized. */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr113077.c b/gcc/testsuite/gcc.target/aarch64/pr113077.c new file mode 100644 index 00000000000..dca202bd2ba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr113077.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O2 -fstack-protector-strong -fstack-clash-protection" } */ +void add_key(const void *payload); +void act_keyctl_test(void) { + char buf[1030 * 1024]; + int i = 0; + for (i = 0; i < sizeof(buf); i++) + { + add_key(buf); + } +}