[gcc r15-436] [PR115013][LRA]: Modify register starvation recognition

2024-05-13 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:44e7855e4e817a7f5a1e332cd95e780e57052dba

commit r15-436-g44e7855e4e817a7f5a1e332cd95e780e57052dba
Author: Vladimir N. Makarov 
Date:   Mon May 13 10:12:11 2024 -0400

[PR115013][LRA]: Modify register starvation recognition

  My recent patch to recognize reg starvation resulted in few GCC test
failures.  The following patch fixes this by using more accurate
starvation calculation and ignoring small reg classes.

gcc/ChangeLog:

PR rtl-optimization/115013
* lra-constraints.cc (process_alt_operands): Update all_used_nregs
only for winreg.  Ignore reg starvation for small reg classes.

Diff:
---
 gcc/lra-constraints.cc | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index e945a4da4519..92b343fa99a0 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -2674,8 +2674,9 @@ process_alt_operands (int only_alternative)
  if (early_clobber_p
  || curr_static_id->operand[nop].type != OP_OUT)
{
- all_used_nregs
-   += ira_reg_class_min_nregs[this_alternative][mode];
+ if (winreg)
+   all_used_nregs
+ += ira_reg_class_min_nregs[this_alternative][mode];
  all_this_alternative
= (reg_class_subunion
   [all_this_alternative][this_alternative]);
@@ -3250,6 +3251,7 @@ process_alt_operands (int only_alternative)
  overall += LRA_MAX_REJECT;
}
   if (all_this_alternative != NO_REGS
+ && !SMALL_REGISTER_CLASS_P (all_this_alternative)
  && all_used_nregs != 0 && all_reload_nregs != 0
  && (all_used_nregs + all_reload_nregs + 1
  >= ira_class_hard_regs_num[all_this_alternative]))


[gcc r15-364] [PR114942][LRA]: Don't reuse input reload reg of inout early clobber operand

2024-05-10 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:9585317f0715699197b1313bbf939c6ea3c1ace6

commit r15-364-g9585317f0715699197b1313bbf939c6ea3c1ace6
Author: Vladimir N. Makarov 
Date:   Fri May 10 09:15:50 2024 -0400

[PR114942][LRA]: Don't reuse input reload reg of inout early clobber operand

  The insn in question has the same reg in inout operand and input
operand.  The inout operand is early clobber.  LRA reused input reload
reg of the inout operand for the input operand which is wrong.  It
were a good decision if the inout operand was not early clobber one.
The patch rejects the reuse for the PR test case.

gcc/ChangeLog:

PR target/114942
* lra-constraints.cc (struct input_reload): Add new member 
early_clobber_p.
(get_reload_reg): Add new arg early_clobber_p, don't reuse input
reload with true early_clobber_p member value, use the arg for new
element of curr_insn_input_reloads.
(match_reload): Assign false to early_clobber_p member.
(process_addr_reg, simplify_operand_subreg, curr_insn_transform):
Adjust get_reload_reg calls.

gcc/testsuite/ChangeLog:

PR target/114942
* gcc.target/i386/pr114942.c: New.

Diff:
---
 gcc/lra-constraints.cc   | 27 +++
 gcc/testsuite/gcc.target/i386/pr114942.c | 24 
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 5b78fd0b7e5c..e945a4da4519 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -599,6 +599,8 @@ struct input_reload
 {
   /* True for input reload of matched operands.  */
   bool match_p;
+  /* True for input reload of inout earlyclobber operand.  */
+  bool early_clobber_p;
   /* Reloaded value.  */
   rtx input;
   /* Reload pseudo used.  */
@@ -649,13 +651,15 @@ canonicalize_reload_addr (rtx addr)
 /* Create a new pseudo using MODE, RCLASS, EXCLUDE_START_HARD_REGS, ORIGINAL or
reuse an existing reload pseudo.  Don't reuse an existing reload pseudo if
IN_SUBREG_P is true and the reused pseudo should be wrapped up in a SUBREG.
+   EARLY_CLOBBER_P is true for input reload of inout early clobber operand.
The result pseudo is returned through RESULT_REG.  Return TRUE if we created
a new pseudo, FALSE if we reused an existing reload pseudo.  Use TITLE to
describe new registers for debug purposes.  */
 static bool
 get_reload_reg (enum op_type type, machine_mode mode, rtx original,
enum reg_class rclass, HARD_REG_SET *exclude_start_hard_regs,
-   bool in_subreg_p, const char *title, rtx *result_reg)
+   bool in_subreg_p, bool early_clobber_p,
+   const char *title, rtx *result_reg)
 {
   int i, regno;
   enum reg_class new_class;
@@ -703,6 +707,7 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx 
original,
 for (i = 0; i < curr_insn_input_reloads_num; i++)
   {
if (! curr_insn_input_reloads[i].match_p
+   && ! curr_insn_input_reloads[i].early_clobber_p
&& rtx_equal_p (curr_insn_input_reloads[i].input, original)
&& in_class_p (curr_insn_input_reloads[i].reg, rclass, _class))
  {
@@ -750,6 +755,8 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx 
original,
   lra_assert (curr_insn_input_reloads_num < LRA_MAX_INSN_RELOADS);
   curr_insn_input_reloads[curr_insn_input_reloads_num].input = original;
   curr_insn_input_reloads[curr_insn_input_reloads_num].match_p = false;
+  curr_insn_input_reloads[curr_insn_input_reloads_num].early_clobber_p
+= early_clobber_p;
   curr_insn_input_reloads[curr_insn_input_reloads_num++].reg = *result_reg;
   return true;
 }
@@ -1189,6 +1196,7 @@ match_reload (signed char out, signed char *ins, signed 
char *outs,
   lra_assert (curr_insn_input_reloads_num < LRA_MAX_INSN_RELOADS);
   curr_insn_input_reloads[curr_insn_input_reloads_num].input = in_rtx;
   curr_insn_input_reloads[curr_insn_input_reloads_num].match_p = true;
+  curr_insn_input_reloads[curr_insn_input_reloads_num].early_clobber_p = false;
   curr_insn_input_reloads[curr_insn_input_reloads_num++].reg = new_in_reg;
   for (i = 0; (in = ins[i]) >= 0; i++)
 if (GET_MODE (*curr_id->operand_loc[in]) == VOIDmode
@@ -1577,7 +1585,7 @@ process_addr_reg (rtx *loc, bool check_only_p, rtx_insn 
**before, rtx_insn **aft
  reg = *loc;
  if (get_reload_reg (after == NULL ? OP_IN : OP_INOUT,
  mode, reg, cl, NULL,
- subreg_p, "address", _reg))
+ subreg_p, false, "address", _reg))
before_p = true;
}
   else if (new_class != NO_REGS && rclass != new_class)
@@ -1733,7 +1741,7 @@ simplify_operand_subreg (int nop, machine_mode reg_mode)
= (enum reg_class) targetm.preferred_reload_class (reg, ALL_REGS);
  if 

[gcc r13-8740] [PR114415][scheduler]: Fixing wrong code generation

2024-05-09 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:e30211cb0b3a2b88959e9bc40626a17461de52de

commit r13-8740-ge30211cb0b3a2b88959e9bc40626a17461de52de
Author: Vladimir N. Makarov 
Date:   Thu Apr 4 16:04:04 2024 -0400

[PR114415][scheduler]: Fixing wrong code generation

  For the test case, the insn scheduler (working for live range
shrinkage) moves insns modifying stack memory before an insn reserving
the stack memory. Comments in the patch contains more details about
the problem and its solution.

gcc/ChangeLog:

PR rtl-optimization/114415
* sched-deps.cc (add_insn_mem_dependence): Add memory check for mem 
argument.
(sched_analyze_1): Treat stack pointer modification as memory read.
(sched_analyze_2, sched_analyze_insn): Add memory guard for 
processing pending_read_mems.
* sched-int.h (deps_desc): Add comment to pending_read_mems.

gcc/testsuite/ChangeLog:

PR rtl-optimization/114415
* gcc.target/i386/pr114415.c: New test.

Diff:
---
 gcc/sched-deps.cc| 49 +---
 gcc/sched-int.h  |  4 ++-
 gcc/testsuite/gcc.target/i386/pr114415.c | 47 ++
 3 files changed, 83 insertions(+), 17 deletions(-)

diff --git a/gcc/sched-deps.cc b/gcc/sched-deps.cc
index 2aa6623ad2ea..2104895f3009 100644
--- a/gcc/sched-deps.cc
+++ b/gcc/sched-deps.cc
@@ -1735,7 +1735,7 @@ add_insn_mem_dependence (class deps_desc *deps, bool 
read_p,
   insn_node = alloc_INSN_LIST (insn, *insn_list);
   *insn_list = insn_node;
 
-  if (sched_deps_info->use_cselib)
+  if (sched_deps_info->use_cselib && MEM_P (mem))
 {
   mem = shallow_copy_rtx (mem);
   XEXP (mem, 0) = cselib_subst_to_values_from_insn (XEXP (mem, 0),
@@ -2458,6 +2458,25 @@ sched_analyze_1 (class deps_desc *deps, rtx x, rtx_insn 
*insn)
   FIRST_STACK_REG);
}
 #endif
+  if (!deps->readonly && regno == STACK_POINTER_REGNUM)
+   {
+ /* Please see PR114115.  We have insn modifying memory on the stack
+and not addressed by stack pointer and we have insn reserving the
+stack space.  If we move the insn modifying memory before insn
+reserving the stack space, we can change memory out of the red
+zone.  Even worse, some optimizations (e.g. peephole) can add
+insns using temporary stack slots before insn reserving the stack
+space but after the insn modifying memory.  This will corrupt the
+modified memory.  Therefore we treat insn changing the stack as
+reading unknown memory.  This will create anti-dependence.  We
+don't need to treat the insn as writing memory because GCC by
+itself does not generate code reading undefined stack memory.  */
+ if ((deps->pending_read_list_length + deps->pending_write_list_length)
+ >= param_max_pending_list_length
+ && !DEBUG_INSN_P (insn))
+   flush_pending_lists (deps, insn, true, true);
+ add_insn_mem_dependence (deps, true, insn, dest);
+   }
 }
   else if (MEM_P (dest))
 {
@@ -2498,10 +2517,11 @@ sched_analyze_1 (class deps_desc *deps, rtx x, rtx_insn 
*insn)
  pending_mem = deps->pending_read_mems;
  while (pending)
{
- if (anti_dependence (pending_mem->element (), t)
- && ! sched_insns_conditions_mutex_p (insn, pending->insn ()))
-   note_mem_dep (t, pending_mem->element (), pending->insn (),
- DEP_ANTI);
+ rtx mem = pending_mem->element ();
+ if (REG_P (mem)
+ || (anti_dependence (mem, t)
+ && ! sched_insns_conditions_mutex_p (insn, pending->insn 
(
+   note_mem_dep (t, mem, pending->insn (), DEP_ANTI);
 
  pending = pending->next ();
  pending_mem = pending_mem->next ();
@@ -2637,12 +2657,10 @@ sched_analyze_2 (class deps_desc *deps, rtx x, rtx_insn 
*insn)
pending_mem = deps->pending_read_mems;
while (pending)
  {
-   if (read_dependence (pending_mem->element (), t)
-   && ! sched_insns_conditions_mutex_p (insn,
-pending->insn ()))
- note_mem_dep (t, pending_mem->element (),
-   pending->insn (),
-   DEP_ANTI);
+   rtx mem = pending_mem->element ();
+   if (MEM_P (mem) && read_dependence (mem, t)
+   && ! sched_insns_conditions_mutex_p (insn, pending->insn 
()))
+ note_mem_dep (t, mem, pending->insn (), DEP_ANTI);
 
pending = pending->next ();
pending_mem = pending_mem->next ();
@@ -3026,8 +3044,7 @@ sched_analyze_insn (class 

[gcc r15-330] [PR114810][LRA]: Recognize alternatives with lack of available registers for insn and demote them.

2024-05-08 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:2f00e6caca1a14dfe26e94f608e9d79a787ebe08

commit r15-330-g2f00e6caca1a14dfe26e94f608e9d79a787ebe08
Author: Vladimir N. Makarov 
Date:   Wed May 8 10:39:04 2024 -0400

[PR114810][LRA]: Recognize alternatives with lack of available registers 
for insn and demote them.

  PR114810 was fixed in machine-dependent way.  This patch is a fix of
the PR on LRA side.  LRA chose alternative with constraints `,r,ro`
on i686 when all operands of DImode and there are only 6 available
general regs.  The patch recognizes such case and significantly
increase the alternative cost.  It does not reject alternative
completely.  So the fix is safe but it might not work for all
potentially possible cases of registers lack as register classes can
have any relations including subsets and intersections.

gcc/ChangeLog:

PR target/114810
* lra-constraints.cc (process_alt_operands): Calculate union reg
class for the alternative, peak matched regs and required reload
regs.  Recognize alternatives with lack of available registers and
make them costly.  Add debug print about this case.

Diff:
---
 gcc/lra-constraints.cc | 43 +--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 10e3d4e40977..5b78fd0b7e5c 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -2127,6 +2127,8 @@ process_alt_operands (int only_alternative)
   /* Numbers of operands which are early clobber registers.  */
   int early_clobbered_nops[MAX_RECOG_OPERANDS];
   enum reg_class curr_alt[MAX_RECOG_OPERANDS];
+  enum reg_class all_this_alternative;
+  int all_used_nregs, all_reload_nregs;
   HARD_REG_SET curr_alt_set[MAX_RECOG_OPERANDS];
   HARD_REG_SET curr_alt_exclude_start_hard_regs[MAX_RECOG_OPERANDS];
   bool curr_alt_match_win[MAX_RECOG_OPERANDS];
@@ -2229,7 +2231,8 @@ process_alt_operands (int only_alternative)
   curr_alt_out_sp_reload_p = false;
   curr_reuse_alt_p = true;
   curr_alt_class_change_p = false;
-  
+  all_this_alternative = NO_REGS;
+  all_used_nregs = all_reload_nregs = 0;
   for (nop = 0; nop < n_operands; nop++)
{
  const char *p;
@@ -2660,6 +2663,15 @@ process_alt_operands (int only_alternative)
  /* Record which operands fit this alternative.  */
  if (win)
{
+ if (early_clobber_p
+ || curr_static_id->operand[nop].type != OP_OUT)
+   {
+ all_used_nregs
+   += ira_reg_class_min_nregs[this_alternative][mode];
+ all_this_alternative
+   = (reg_class_subunion
+  [all_this_alternative][this_alternative]);
+   }
  this_alternative_win = true;
  if (class_change_p)
{
@@ -2781,7 +2793,19 @@ process_alt_operands (int only_alternative)
   & ~((ira_prohibited_class_mode_regs
[this_alternative][mode])
   | lra_no_alloc_regs));
- if (hard_reg_set_empty_p (available_regs))
+ if (!hard_reg_set_empty_p (available_regs))
+   {
+ if (early_clobber_p
+ || curr_static_id->operand[nop].type != OP_OUT)
+   {
+ all_reload_nregs
+   += ira_reg_class_min_nregs[this_alternative][mode];
+ all_this_alternative
+   = (reg_class_subunion
+  [all_this_alternative][this_alternative]);
+   }
+   }
+ else
{
  /* There are no hard regs holding a value of given
 mode.  */
@@ -3217,6 +3241,21 @@ process_alt_operands (int only_alternative)
 "Cycle danger: overall += LRA_MAX_REJECT\n");
  overall += LRA_MAX_REJECT;
}
+  if (all_this_alternative != NO_REGS
+ && all_used_nregs != 0 && all_reload_nregs != 0
+ && (all_used_nregs + all_reload_nregs + 1
+ >= ira_class_hard_regs_num[all_this_alternative]))
+   {
+ if (lra_dump_file != NULL)
+   fprintf
+ (lra_dump_file,
+  "Register starvation: overall += LRA_MAX_REJECT"
+  "(class=%s,avail=%d,used=%d,reload=%d)\n",
+  reg_class_names[all_this_alternative],
+  ira_class_hard_regs_num[all_this_alternative],
+  all_used_nregs, all_reload_nregs);
+ overall += LRA_MAX_REJECT;
+   }
   ok_p = true;
   curr_alt_dont_inherit_ops_num = 0;
   for (nop = 0; nop < early_clobbered_regs_num; nop++)


[gcc r14-9793] [PR114415][scheduler]: Fixing wrong code generation

2024-04-04 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:a24476422ba311b83737cf8bdc5892a7fc7514eb

commit r14-9793-ga24476422ba311b83737cf8bdc5892a7fc7514eb
Author: Vladimir N. Makarov 
Date:   Thu Apr 4 16:04:04 2024 -0400

[PR114415][scheduler]: Fixing wrong code generation

  For the test case, the insn scheduler (working for live range
shrinkage) moves insns modifying stack memory before an insn reserving
the stack memory. Comments in the patch contains more details about
the problem and its solution.

gcc/ChangeLog:

PR rtl-optimization/114415
* sched-deps.cc (add_insn_mem_dependence): Add memory check for mem 
argument.
(sched_analyze_1): Treat stack pointer modification as memory read.
(sched_analyze_2, sched_analyze_insn): Add memory guard for 
processing pending_read_mems.
* sched-int.h (deps_desc): Add comment to pending_read_mems.

gcc/testsuite/ChangeLog:

PR rtl-optimization/114415
* gcc.target/i386/pr114415.c: New test.

Diff:
---
 gcc/sched-deps.cc| 49 +---
 gcc/sched-int.h  |  4 ++-
 gcc/testsuite/gcc.target/i386/pr114415.c | 47 ++
 3 files changed, 83 insertions(+), 17 deletions(-)

diff --git a/gcc/sched-deps.cc b/gcc/sched-deps.cc
index 5034e664e5e..4c668245049 100644
--- a/gcc/sched-deps.cc
+++ b/gcc/sched-deps.cc
@@ -1735,7 +1735,7 @@ add_insn_mem_dependence (class deps_desc *deps, bool 
read_p,
   insn_node = alloc_INSN_LIST (insn, *insn_list);
   *insn_list = insn_node;
 
-  if (sched_deps_info->use_cselib)
+  if (sched_deps_info->use_cselib && MEM_P (mem))
 {
   mem = shallow_copy_rtx (mem);
   XEXP (mem, 0) = cselib_subst_to_values_from_insn (XEXP (mem, 0),
@@ -2458,6 +2458,25 @@ sched_analyze_1 (class deps_desc *deps, rtx x, rtx_insn 
*insn)
   FIRST_STACK_REG);
}
 #endif
+  if (!deps->readonly && regno == STACK_POINTER_REGNUM)
+   {
+ /* Please see PR114115.  We have insn modifying memory on the stack
+and not addressed by stack pointer and we have insn reserving the
+stack space.  If we move the insn modifying memory before insn
+reserving the stack space, we can change memory out of the red
+zone.  Even worse, some optimizations (e.g. peephole) can add
+insns using temporary stack slots before insn reserving the stack
+space but after the insn modifying memory.  This will corrupt the
+modified memory.  Therefore we treat insn changing the stack as
+reading unknown memory.  This will create anti-dependence.  We
+don't need to treat the insn as writing memory because GCC by
+itself does not generate code reading undefined stack memory.  */
+ if ((deps->pending_read_list_length + deps->pending_write_list_length)
+ >= param_max_pending_list_length
+ && !DEBUG_INSN_P (insn))
+   flush_pending_lists (deps, insn, true, true);
+ add_insn_mem_dependence (deps, true, insn, dest);
+   }
 }
   else if (MEM_P (dest))
 {
@@ -2498,10 +2517,11 @@ sched_analyze_1 (class deps_desc *deps, rtx x, rtx_insn 
*insn)
  pending_mem = deps->pending_read_mems;
  while (pending)
{
- if (anti_dependence (pending_mem->element (), t)
- && ! sched_insns_conditions_mutex_p (insn, pending->insn ()))
-   note_mem_dep (t, pending_mem->element (), pending->insn (),
- DEP_ANTI);
+ rtx mem = pending_mem->element ();
+ if (REG_P (mem)
+ || (anti_dependence (mem, t)
+ && ! sched_insns_conditions_mutex_p (insn, pending->insn 
(
+   note_mem_dep (t, mem, pending->insn (), DEP_ANTI);
 
  pending = pending->next ();
  pending_mem = pending_mem->next ();
@@ -2637,12 +2657,10 @@ sched_analyze_2 (class deps_desc *deps, rtx x, rtx_insn 
*insn)
pending_mem = deps->pending_read_mems;
while (pending)
  {
-   if (read_dependence (pending_mem->element (), t)
-   && ! sched_insns_conditions_mutex_p (insn,
-pending->insn ()))
- note_mem_dep (t, pending_mem->element (),
-   pending->insn (),
-   DEP_ANTI);
+   rtx mem = pending_mem->element ();
+   if (MEM_P (mem) && read_dependence (mem, t)
+   && ! sched_insns_conditions_mutex_p (insn, pending->insn 
()))
+ note_mem_dep (t, mem, pending->insn (), DEP_ANTI);
 
pending = pending->next ();
pending_mem = pending_mem->next ();
@@ -3026,8 +3044,7 @@ sched_analyze_insn (class 

[gcc r14-9557] [PR99829][LRA]: Fixing LRA ICE on arm

2024-03-19 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:9c91f8a88b2db50c8faf70786d3cef27b39ac9fc

commit r14-9557-g9c91f8a88b2db50c8faf70786d3cef27b39ac9fc
Author: Vladimir N. Makarov 
Date:   Tue Mar 19 16:57:11 2024 -0400

[PR99829][LRA]: Fixing LRA ICE on arm

  LRA removed insn setting equivalence to memory whose output was
reloaded. This resulted in writing an uninitiated value to the memory
which triggered assert in LRA code checking the final generated code.
This patch fixes the problem.  Comment in the patch contains more
details about the problem and its solution.

gcc/ChangeLog:

PR target/99829
* lra-constraints.cc (lra_constraints): Prevent removing insn
with reverse equivalence to memory if the memory was reloaded.

Diff:
---
 gcc/lra-constraints.cc | 26 ++
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 0ae81c1ff9c..10e3d4e4097 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -5213,7 +5213,7 @@ lra_constraints (bool first_p)
   bool changed_p;
   int i, hard_regno, new_insns_num;
   unsigned int min_len, new_min_len, uid;
-  rtx set, x, reg, dest_reg;
+  rtx set, x, reg, nosubreg_dest;
   rtx_insn *original_insn;
   basic_block last_bb;
   bitmap_iterator bi;
@@ -5377,14 +5377,14 @@ lra_constraints (bool first_p)
{
  if ((set = single_set (curr_insn)) != NULL_RTX)
{
- dest_reg = SET_DEST (set);
+ nosubreg_dest = SET_DEST (set);
  /* The equivalence pseudo could be set up as SUBREG in a
 case when it is a call restore insn in a mode
 different from the pseudo mode.  */
- if (GET_CODE (dest_reg) == SUBREG)
-   dest_reg = SUBREG_REG (dest_reg);
- if ((REG_P (dest_reg)
-  && (x = get_equiv (dest_reg)) != dest_reg
+ if (GET_CODE (nosubreg_dest) == SUBREG)
+   nosubreg_dest = SUBREG_REG (nosubreg_dest);
+ if ((REG_P (nosubreg_dest)
+  && (x = get_equiv (nosubreg_dest)) != nosubreg_dest
   /* Remove insns which set up a pseudo whose value
  cannot be changed.  Such insns might be not in
  init_insns because we don't update equiv data
@@ -5403,11 +5403,21 @@ lra_constraints (bool first_p)
  up the equivalence.  */
   || in_list_p (curr_insn,
 ira_reg_equiv
-[REGNO (dest_reg)].init_insns)))
+[REGNO (nosubreg_dest)].init_insns)))
  || (((x = get_equiv (SET_SRC (set))) != SET_SRC (set))
  && in_list_p (curr_insn,
ira_reg_equiv
-   [REGNO (SET_SRC (set))].init_insns)))
+   [REGNO (SET_SRC (set))].init_insns)
+ /* This is a reverse equivalence to memory (see ira.cc)
+in store insn.  We can reload all the destination and
+have an output reload which is a store to memory.  If
+we just remove the insn, we will have the output
+reload storing an undefined value to the memory.
+Check that we did not reload the memory to prevent a
+wrong code generation.  We could implement using the
+equivalence still in such case but doing this is not
+worth the efforts as such case is very rare.  */
+ && MEM_P (nosubreg_dest)))
{
  /* This is equiv init insn of pseudo which did not get a
 hard register -- remove the insn.  */


[gcc r14-9401] [PR113790][LRA]: Fixing LRA ICE on riscv64

2024-03-08 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:cebbaa2a84586a7345837f74a53b7a0263bf29ee

commit r14-9401-gcebbaa2a84586a7345837f74a53b7a0263bf29ee
Author: Vladimir N. Makarov 
Date:   Fri Mar 8 14:48:33 2024 -0500

[PR113790][LRA]: Fixing LRA ICE on riscv64

  LRA failed to consider all insn alternatives when non-reload pseudo
did not get a hard register.  This resulted in failure to generate
code by LRA.  The patch fixes this problem.

gcc/ChangeLog:

PR target/113790
* lra-assigns.cc (assign_by_spills): Set up all_spilled_pseudos
for non-reload pseudo too.

Diff:
---
 gcc/lra-assigns.cc | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/gcc/lra-assigns.cc b/gcc/lra-assigns.cc
index d1b2b35ffc9..7dfa6f70941 100644
--- a/gcc/lra-assigns.cc
+++ b/gcc/lra-assigns.cc
@@ -1430,13 +1430,19 @@ assign_by_spills (void)
hard_regno = spill_for (regno, _spilled_pseudos, iter == 1);
  if (hard_regno < 0)
{
- if (reload_p) {
-   /* Put unassigned reload pseudo first in the
-  array.  */
-   regno2 = sorted_pseudos[nfails];
-   sorted_pseudos[nfails++] = regno;
-   sorted_pseudos[i] = regno2;
- }
+ if (reload_p)
+   {
+ /* Put unassigned reload pseudo first in the array.  */
+ regno2 = sorted_pseudos[nfails];
+ sorted_pseudos[nfails++] = regno;
+ sorted_pseudos[i] = regno2;
+   }
+ else
+   {
+ /* Consider all alternatives on the next constraint
+subpass.  */
+ bitmap_set_bit (_spilled_pseudos, regno);
+   }
}
  else
{