Hello, Richard,

Thanks for the review!

On Mar 31, 2022, Richard Sandiford <richard.sandif...@arm.com> wrote:

>> +    /* If the natural mode doesn't work, try some wider mode.  */
>> +    if (!targetm.hard_regno_mode_ok (regno, mode))
>> +      {
>> +        for (int nregs = 2;
>> +             regno + nregs <= FIRST_PSEUDO_REGISTER
>> +               && TEST_HARD_REG_BIT (need_zeroed_hardregs,
>> +                                     regno + nregs - 1);
>> +             nregs++)
>> +          {
>> +            mode = choose_hard_reg_mode (regno, nregs, 0);

> I like the idea, but it would be good to avoid the large:

>   FIRST_PSEUDO_REGISTER * FIRST_PSEUDO_REGISTER * NUM_MACHINE_MODES

> constant factor.

Enteringf the nregs loop, because the register can't be used in its
natural mode, is supposed to be an unusual case, not worth optimizing
much under Amdahl's law.  I gather the aggregate trip counts are
unlikely to hit the theoretical O(n^2) because registers that would take
the loop are rare and expected to be paired/grouped up.  If that
assumption doesn't hold, then a cap would indeed be desirable.

> How about if init_reg_modes_target recorded the maximum value of
> x_hard_regno_nregs?

I had thought of a cap but couldn't find one I was happy with, and in
the end I thought we didn't need one.  But this is indeed a good one to
use.  Thanks, I'm implementing it.

> This seems big enough to be worth splitting out into a helper, rather
> than repeating.

I had considered that, but it didn't seem to me it would bring an
improvement.  As it turns out, it does.  Thanks.

>> -            rtx zsrc = gen_rtx_REG (mode, src);
>> +            rtx src_rtx = (mode == GET_MODE (regno_reg_rtx[src])
>> +                           ? regno_reg_rtx[src]
>> +                           : gen_rtx_REG (mode, src));

> Is this needed?  The original gen_rtx_REG (mode, src) seems OK.

No, it's not needed, it's just an attempt to avoid allocating RTL that
we have handy.  This function could in theory make several attempts at
allocating rtl for each register in the shrinking pending set.  I
thought every saved bit could help.


Here's what I'm regstrapping on x86_64-linux-gnu, after verifying that
it does the job on the affected arm variant.  Ok to install, assuming no
surprises in the testing?


try multi-reg dest in default_zero_call_used_regs

From: Alexandre Oliva <ol...@adacore.com>

When the mode of regno_reg_rtx is not hard_regno_mode_ok for the
target, try grouping the register with subsequent ones.  This enables
s16 to s31 and their hidden pairs to be zeroed with the default logic
on some arm variants.


for  gcc/ChangeLog

        * targhooks.c (default_zero_call_used_regs): Attempt to group
        regs that the target refuses to use in their natural modes.
        (zcur_select_mode_rtx): New.
        * regs.h (struct target_regs): Add x_hard_regno_max_nregs.
        (hard_regno_max_nregs): Define.
        * reginfo.c (init_reg_modes_target): Set hard_regno_max_nregs.
---
 gcc/reginfo.cc   |    9 ++++--
 gcc/regs.h       |    5 +++
 gcc/targhooks.cc |   86 ++++++++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 89 insertions(+), 11 deletions(-)

diff --git a/gcc/reginfo.cc b/gcc/reginfo.cc
index 234f72eceeb25..67e30cab42855 100644
--- a/gcc/reginfo.cc
+++ b/gcc/reginfo.cc
@@ -441,10 +441,15 @@ init_reg_modes_target (void)
 {
   int i, j;
 
+  this_target_regs->x_hard_regno_max_nregs = 1;
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     for (j = 0; j < MAX_MACHINE_MODE; j++)
-      this_target_regs->x_hard_regno_nregs[i][j]
-       = targetm.hard_regno_nregs (i, (machine_mode) j);
+      {
+       unsigned char nregs = targetm.hard_regno_nregs (i, (machine_mode) j);
+       this_target_regs->x_hard_regno_nregs[i][j] = nregs;
+       if (nregs > this_target_regs->x_hard_regno_max_nregs)
+         this_target_regs->x_hard_regno_max_nregs = nregs;
+      }
 
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     {
diff --git a/gcc/regs.h b/gcc/regs.h
index 74f1f63770322..f72b06fb56508 100644
--- a/gcc/regs.h
+++ b/gcc/regs.h
@@ -202,6 +202,9 @@ struct target_regs {
      registers that a given machine mode occupies.  */
   unsigned char x_hard_regno_nregs[FIRST_PSEUDO_REGISTER][MAX_MACHINE_MODE];
 
+  /* The max value found in x_hard_regno_nregs.  */
+  unsigned char x_hard_regno_max_nregs;
+
   /* For each hard register, the widest mode object that it can contain.
      This will be a MODE_INT mode if the register can hold integers.  Otherwise
      it will be a MODE_FLOAT or a MODE_CC mode, whichever is valid for the
@@ -235,6 +238,8 @@ extern struct target_regs *this_target_regs;
 #else
 #define this_target_regs (&default_target_regs)
 #endif
+#define hard_regno_max_nregs \
+  (this_target_regs->x_hard_regno_max_nregs)
 #define reg_raw_mode \
   (this_target_regs->x_reg_raw_mode)
 #define have_regs_of_mode \
diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
index fc49235eb38ee..2681833e2ce79 100644
--- a/gcc/targhooks.cc
+++ b/gcc/targhooks.cc
@@ -1017,6 +1017,45 @@ default_function_value_regno_p (const unsigned int regno 
ATTRIBUTE_UNUSED)
 #endif
 }
 
+/* Choose the mode and rtx to use to zero REGNO, storing tem in PMODE and
+   PREGNO_RTX and returning TRUE if successful, otherwise returning FALSE.  If
+   the natural mode for REGNO doesn't work, attempt to group it with subsequent
+   adjacent registers set in TOZERO.  */
+
+static inline bool
+zcur_select_mode_rtx (unsigned int regno, machine_mode *pmode,
+                     rtx *pregno_rtx, HARD_REG_SET tozero)
+{
+  rtx regno_rtx = regno_reg_rtx[regno];
+  machine_mode mode = GET_MODE (regno_rtx);
+
+  /* If the natural mode doesn't work, try some wider mode.  */
+  if (!targetm.hard_regno_mode_ok (regno, mode))
+    {
+      bool found = false;
+      for (int nregs = 2;
+          !found && nregs <= hard_regno_max_nregs
+            && regno + nregs <= FIRST_PSEUDO_REGISTER
+            && TEST_HARD_REG_BIT (tozero,
+                                  regno + nregs - 1);
+          nregs++)
+       {
+         mode = choose_hard_reg_mode (regno, nregs, 0);
+         if (mode == E_VOIDmode)
+           continue;
+         gcc_checking_assert (targetm.hard_regno_mode_ok (regno, mode));
+         regno_rtx = gen_rtx_REG (mode, regno);
+         found = true;
+       }
+      if (!found)
+       return false;
+    }
+
+  *pmode = mode;
+  *pregno_rtx = regno_rtx;
+  return true;
+}
+
 /* The default hook for TARGET_ZERO_CALL_USED_REGS.  */
 
 HARD_REG_SET
@@ -1035,16 +1074,28 @@ default_zero_call_used_regs (HARD_REG_SET 
need_zeroed_hardregs)
     if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
       {
        rtx_insn *last_insn = get_last_insn ();
-       machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
+       rtx regno_rtx;
+       machine_mode mode;
+
+       if (!zcur_select_mode_rtx (regno, &mode, &regno_rtx,
+                                  need_zeroed_hardregs))
+         {
+           SET_HARD_REG_BIT (failed, regno);
+           continue;
+         }
+
        rtx zero = CONST0_RTX (mode);
-       rtx_insn *insn = emit_move_insn (regno_reg_rtx[regno], zero);
+       rtx_insn *insn = emit_move_insn (regno_rtx, zero);
        if (!valid_insn_p (insn))
          {
            SET_HARD_REG_BIT (failed, regno);
            delete_insns_since (last_insn);
          }
        else
-         progress = true;
+         {
+           progress = true;
+           regno += hard_regno_nregs (regno, mode) - 1;
+         }
       }
 
   /* Now retry with copies from zeroed registers, as long as we've
@@ -1060,7 +1111,18 @@ default_zero_call_used_regs (HARD_REG_SET 
need_zeroed_hardregs)
       for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
        if (TEST_HARD_REG_BIT (retrying, regno))
          {
-           machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
+           rtx regno_rtx;
+           machine_mode mode;
+
+           /* This might select registers we've already zeroed.  If grouping
+              with them is what it takes to get regno zeroed, so be it.  */
+           if (!zcur_select_mode_rtx (regno, &mode, &regno_rtx,
+                                      need_zeroed_hardregs))
+             {
+               SET_HARD_REG_BIT (failed, regno);
+               continue;
+             }
+
            bool success = false;
            /* Look for a source.  */
            for (unsigned int src = 0; src < FIRST_PSEUDO_REGISTER; src++)
@@ -1086,8 +1148,11 @@ default_zero_call_used_regs (HARD_REG_SET 
need_zeroed_hardregs)
 
                /* SRC is usable, try to copy from it.  */
                rtx_insn *last_insn = get_last_insn ();
-               rtx zsrc = gen_rtx_REG (mode, src);
-               rtx_insn *insn = emit_move_insn (regno_reg_rtx[regno], zsrc);
+               /* Try to avoid allocating RTL that we have handy.  */
+               rtx src_rtx = (mode == GET_MODE (regno_reg_rtx[src])
+                              ? regno_reg_rtx[src]
+                              : gen_rtx_REG (mode, src));
+               rtx_insn *insn = emit_move_insn (regno_rtx, src_rtx);
                if (!valid_insn_p (insn))
                  /* It didn't work, remove any inserts.  We'll look
                     for another SRC.  */
@@ -1100,13 +1165,16 @@ default_zero_call_used_regs (HARD_REG_SET 
need_zeroed_hardregs)
                  }
              }
 
-           /* If nothing worked for REGNO this round, marked it to be
+           /* If nothing worked for REGNO this round, mark it to be
               retried if we get another round.  */
            if (!success)
              SET_HARD_REG_BIT (failed, regno);
            else
-             /* Take note so as to enable another round if needed.  */
-             progress = true;
+             {
+               /* Take note so as to enable another round if needed.  */
+               progress = true;
+               regno += hard_regno_nregs (regno, mode) - 1;
+             }
          }
     }
 


-- 
Alexandre Oliva, happy hacker                https://FSFLA.org/blogs/lxo/
   Free Software Activist                       GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about <https://stallmansupport.org>

Reply via email to