https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118615
--- Comment #27 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
So the -fcompare-debug issue is because curr_insn is a JUMP_INSN:
(call_insn 10013 3 4 3 (parallel [
(call (mem:DI (symbol_ref:DI ("g") [flags 0x41] <function_decl
0x7fffea2d2000 g>) [0 g S8 A8])
(const_int 0 [0]))
(unspec:DI [
(const_int 2 [0x2])
] UNSPEC_CALLEE_ABI)
(clobber (reg:DI 30 x30))
]) "pr118615.c":8:5 63 {*call_insn}
(expr_list:REG_CALL_DECL (symbol_ref:DI ("g") [flags 0x41] <function_decl
0x7fffea2d2000 g>)
(nil))
(expr_list (clobber (reg:DI 17 x17))
(expr_list (clobber (reg:DI 16 x16))
(nil))))
(debug_insn 4 10013 10016 3 (debug_marker) "pr118615.c":9:5 -1
(nil))
(jump_insn 10016 4 10017 3 (set (pc)
(if_then_else (eq (reg/f:DI 109 [ m ])
(const_int 0 [0]))
(label_ref:DI 10025)
(pc))) "pr118615.c":9:8 37 {aarch64_cbeqdi1}
(int_list:REG_BR_PROB 499612076 (nil))
-> 10025)
and because of that before_p is true in:
before_p = (JUMP_P (curr_insn)
|| (CALL_P (curr_insn) && reg->type ==
OP_IN));
if (NONDEBUG_INSN_P (curr_insn)
&& (! JUMP_P (curr_insn) || reg->type == OP_IN)
&& split_if_necessary (src_regno, reg->biggest_mode,
potential_reload_hard_regs,
before_p, curr_insn, max_uid))
{
if (reg->subreg_p)
check_and_force_assignment_correctness_p = true;
change_p = true;
/* Invalidate. */
usage_insns[src_regno].check = 0;
if (before_p)
use_insn = PREV_INSN (curr_insn);
}
The -g vs. -g0 difference is that use_insn will be after this in one case a
DEBUG_INSN and in the other the CALL_INSN before that.
And things just go wild from that. Guess the assumption was that
split_if_necessary if it returns true and before_p is true as well must have
added at least one real insn before curr_insn, which is now violated.
Changing the patch to:
--- gcc/lra-constraints.cc.jj 2025-03-19 19:20:41.644440691 +0100
+++ gcc/lra-constraints.cc 2025-03-20 18:40:04.188299643 +0100
@@ -152,6 +152,9 @@ static machine_mode curr_operand_mode[MA
(e.g. constant) and whose subreg is given operand of the current
insn. VOIDmode in all other cases. */
static machine_mode original_subreg_reg_mode[MAX_RECOG_OPERANDS];
+/* The first call insn after curr_insn within the EBB during inherit_in_ebb
+ or NULL outside of that function. */
+static rtx_insn *first_call_insn;
@@ -6373,12 +6376,26 @@ split_reg (bool before_p, int original_r
lra_process_new_insns (as_a <rtx_insn *> (usage_insn),
after_p ? NULL : restore,
after_p ? restore : NULL,
- call_save_p
- ? "Add reg<-save" : "Add reg<-split");
- lra_process_new_insns (insn, before_p ? save : NULL,
- before_p ? NULL : save,
- call_save_p
- ? "Add save<-reg" : "Add split<-reg");
+ call_save_p ? "Add reg<-save" : "Add reg<-split");
+ if (call_save_p
+ && first_call_insn != NULL
+ && BLOCK_FOR_INSN (first_call_insn) != BLOCK_FOR_INSN (insn))
+ /* PR116028: If original_regno is a pseudo that has been assigned a
+ call-save hard register, then emit the spill insn before the call
+ insn 'first_call_insn' instead of adjacent to 'insn'. If 'insn'
+ and 'first_call_insn' belong to the same EBB but to two separate
+ BBs, and if 'insn' is present in the entry BB, then generating the
+ spill insn in the entry BB can prevent shrink wrap from happening.
+ This is because the spill insn references the stack pointer and
+ hence the prolog gets generated in the entry BB itself. It is
+ also more efficient to generate the spill before
+ 'first_call_insn' as the spill now occurs only in the path
+ containing the call. */
+ lra_process_new_insns (first_call_insn, save, NULL, "Add save<-reg");
+ else
+ lra_process_new_insns (insn, before_p ? save : NULL,
+ before_p ? NULL : save,
+ call_save_p ? "Add save<-reg" : "Add split<-reg");
if (nregs > 1 || original_regno < FIRST_PSEUDO_REGISTER)
/* If we are trying to split multi-register. We should check
conflicts on the next assignment sub-pass. IRA can allocate on
@@ -6484,7 +6501,7 @@ split_if_necessary (int regno, machine_m
&& (INSN_UID (XEXP (next_usage_insns, 0)) < max_uid)))
&& need_for_split_p (potential_reload_hard_regs, regno + i)
&& split_reg (before_p, regno + i, insn, next_usage_insns, NULL))
- res = true;
+ res = true;
return res;
}
@@ -6862,6 +6879,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn
last_processed_bb = NULL;
CLEAR_HARD_REG_SET (potential_reload_hard_regs);
live_hard_regs = eliminable_regset | lra_no_alloc_regs;
+ first_call_insn = NULL;
/* We don't process new insns generated in the loop. */
for (curr_insn = tail; curr_insn != PREV_INSN (head); curr_insn = prev_insn)
{
@@ -7074,6 +7092,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn
last_call_for_abi[callee_abi.id ()] = calls_num;
full_and_partial_call_clobbers
|= callee_abi.full_and_partial_reg_clobbers ();
+ first_call_insn = curr_insn;
if ((cheap = find_reg_note (curr_insn,
REG_RETURNED, NULL_RTX)) != NULL_RTX
&& ((cheap = XEXP (cheap, 0)), true)
@@ -7142,6 +7161,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn
{
bool before_p;
rtx_insn *use_insn = curr_insn;
+ rtx_insn *prev_insn = PREV_INSN (curr_insn);
before_p = (JUMP_P (curr_insn)
|| (CALL_P (curr_insn) && reg->type ==
OP_IN));
@@ -7156,7 +7176,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn
change_p = true;
/* Invalidate. */
usage_insns[src_regno].check = 0;
- if (before_p)
+ if (before_p && PREV_INSN (curr_insn) != prev_insn)
use_insn = PREV_INSN (curr_insn);
}
if (NONDEBUG_INSN_P (curr_insn))
@@ -7278,6 +7298,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn
}
}
}
+ first_call_insn = NULL;
return change_p;
}
makes the -fcompare-debug issue go away.
The big question is if the wrong-code issues that were reported first go away
with this version of the patch which doesn't randomly use unrelated CALL_INSN
when not called from within inherit_in_ebb and has the xstormy BLOCK_FOR_INSN
check.