From: Levy Hsu <ad...@levyhsu.com>

This expands sub-word loads as a zero/sign extended load, followed by
a subreg.  This helps eliminate unnecessary zero/sign extend insns after
the load, particularly for volatiles, but also in some other cases.
Testing shows that it gives consistent code size decreases.

Tested with riscv32-elf rv32imac/ilp32 and riscv64-linux rv64gc/lp064d
builds and checks.  Some -gsplit-stack tests fail with the patch, but
this turns out to be an existing bug with the split-stack support that
I hadn't noticed before.  It isn't a bug in this patch.  Ignoring that
there are no regressions.

Committed.

        gcc/
        PR target/97417
        * config/riscv/riscv-shorten-memrefs.c (pass_shorten_memrefs): Add
        extend parameter to get_si_mem_base_reg declaration.
        (get_si_mem_base_reg): Add extend parameter.  Set it.
        (analyze): Pass extend arg to get_si_mem_base_reg.
        (transform): Likewise.  Use it when rewriting mems.
        * config/riscv/riscv.c (riscv_legitimize_move): Check for subword
        loads and emit sign/zero extending load followed by subreg move.
---
 gcc/config/riscv/riscv-shorten-memrefs.c | 34 +++++++++++++++++++-----
 gcc/config/riscv/riscv.c                 | 22 +++++++++++++++
 2 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/gcc/config/riscv/riscv-shorten-memrefs.c 
b/gcc/config/riscv/riscv-shorten-memrefs.c
index b1b57f1b5e0..3f34065c1ce 100644
--- a/gcc/config/riscv/riscv-shorten-memrefs.c
+++ b/gcc/config/riscv/riscv-shorten-memrefs.c
@@ -75,12 +75,19 @@ private:
 
   regno_map * analyze (basic_block bb);
   void transform (regno_map *m, basic_block bb);
-  bool get_si_mem_base_reg (rtx mem, rtx *addr);
+  bool get_si_mem_base_reg (rtx mem, rtx *addr, bool *extend);
 }; // class pass_shorten_memrefs
 
 bool
-pass_shorten_memrefs::get_si_mem_base_reg (rtx mem, rtx *addr)
+pass_shorten_memrefs::get_si_mem_base_reg (rtx mem, rtx *addr, bool *extend)
 {
+  /* Whether it's sign/zero extended.  */
+  if (GET_CODE (mem) == ZERO_EXTEND || GET_CODE (mem) == SIGN_EXTEND)
+    {
+      *extend = true;
+      mem = XEXP (mem, 0);
+    }
+
   if (!MEM_P (mem) || GET_MODE (mem) != SImode)
     return false;
   *addr = XEXP (mem, 0);
@@ -110,7 +117,8 @@ pass_shorten_memrefs::analyze (basic_block bb)
        {
          rtx mem = XEXP (pat, i);
          rtx addr;
-         if (get_si_mem_base_reg (mem, &addr))
+         bool extend = false;
+         if (get_si_mem_base_reg (mem, &addr, &extend))
            {
              HOST_WIDE_INT regno = REGNO (XEXP (addr, 0));
              /* Do not count store zero as these cannot be compressed.  */
@@ -150,7 +158,8 @@ pass_shorten_memrefs::transform (regno_map *m, basic_block 
bb)
        {
          rtx mem = XEXP (pat, i);
          rtx addr;
-         if (get_si_mem_base_reg (mem, &addr))
+         bool extend = false;
+         if (get_si_mem_base_reg (mem, &addr, &extend))
            {
              HOST_WIDE_INT regno = REGNO (XEXP (addr, 0));
              /* Do not transform store zero as these cannot be compressed.  */
@@ -161,9 +170,20 @@ pass_shorten_memrefs::transform (regno_map *m, basic_block 
bb)
                }
              if (m->get_or_insert (regno) > 3)
                {
-                 addr
-                   = targetm.legitimize_address (addr, addr, GET_MODE (mem));
-                 XEXP (pat, i) = replace_equiv_address (mem, addr);
+                 if (extend)
+                   {
+                     addr
+                       = targetm.legitimize_address (addr, addr,
+                                                     GET_MODE (XEXP (mem, 0)));
+                     XEXP (XEXP (pat, i), 0)
+                       = replace_equiv_address (XEXP (mem, 0), addr);
+                   }
+                 else
+                   {
+                     addr = targetm.legitimize_address (addr, addr,
+                                                        GET_MODE (mem));
+                     XEXP (pat, i) = replace_equiv_address (mem, addr);
+                   }
                  df_insn_rescan (insn);
                }
            }
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 7d274596ba3..fffd0814eee 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -1524,6 +1524,28 @@ riscv_legitimize_const_move (machine_mode mode, rtx 
dest, rtx src)
 bool
 riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
 {
+  /* Expand 
+       (set (reg:QI target) (mem:QI (address))) 
+     to
+       (set (reg:DI temp) (zero_extend:DI (mem:QI (address))))
+       (set (reg:QI target) (subreg:QI (reg:DI temp) 0))
+     with auto-sign/zero extend.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && GET_MODE_SIZE (mode) < UNITS_PER_WORD
+      && can_create_pseudo_p ()
+      && MEM_P (src))
+    {
+      rtx temp_reg;
+      int zero_extend_p;
+
+      temp_reg = gen_reg_rtx (word_mode);
+      zero_extend_p = (LOAD_EXTEND_OP (mode) == ZERO_EXTEND);
+      emit_insn (gen_extend_insn (temp_reg, src, word_mode, mode, 
+                                 zero_extend_p));
+      riscv_emit_move (dest, gen_lowpart (mode, temp_reg));
+      return true;
+    }
+
   if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
     {
       rtx reg;
-- 
2.17.1

Reply via email to