2019-07-03  Joern Rennecke  <joern.renne...@riscy-ip.com>

        * postreload.c (rtl-iter.h): Include.
        (reg_addr_use_luid, reg_addr_use_insn): New static variables.
        (reg_addr_use, move2add_last_jump_luid): Likewise.
        (move2add_use_add2_insn): Try to use a POST_MODIFY before and add.
        (reload_cse_move2add): Keep new static variables up to date.

        (move2add_use_add2_insn): If using add fails, try
        to use movstr* or xor.

Index: postreload.c
===================================================================
--- postreload.c        (revision 272931)
+++ postreload.c        (working copy)
@@ -40,6 +40,7 @@ Software Foundation; either version 3, o
 #include "cselib.h"
 #include "tree-pass.h"
 #include "dbgcnt.h"
+#include "rtl-iter.h"
 
 static int reload_cse_noop_set_p (rtx);
 static bool reload_cse_simplify (rtx_insn *, rtx);
@@ -1646,6 +1647,12 @@ reload_combine_note_use (rtx *xp, rtx_in
 static rtx reg_symbol_ref[FIRST_PSEUDO_REGISTER];
 static machine_mode reg_mode[FIRST_PSEUDO_REGISTER];
 
+/* Note when and where we last saw this register used as a plain
+   register-indirect address.  */
+static int reg_addr_use_luid[FIRST_PSEUDO_REGISTER];
+static rtx_insn *reg_addr_use_insn[FIRST_PSEUDO_REGISTER];
+static rtx *reg_addr_use[FIRST_PSEUDO_REGISTER];
+
 /* move2add_luid is linearly increased while scanning the instructions
    from first to last.  It is used to set reg_set_luid in
    reload_cse_move2add and move2add_note_store.  */
@@ -1654,6 +1661,7 @@ reload_combine_note_use (rtx *xp, rtx_in
 /* move2add_last_label_luid is set whenever a label is found.  Labels
    invalidate all previously collected reg_offset data.  */
 static int move2add_last_label_luid;
+static int move2add_last_jump_luid;
 
 /* ??? We don't know how zero / sign extension is handled, hence we
    can't go from a narrower to a wider mode.  */
@@ -1768,6 +1776,18 @@ move2add_use_add2_insn (scalar_int_mode
       if (INTVAL (off) == reg_offset [regno])
        changed = validate_change (insn, &SET_SRC (pat), reg, 0);
     }
+  else if (HAVE_POST_MODIFY_DISP
+          && reg_addr_use_luid[regno] > reg_set_luid[regno]
+          && reg_addr_use_luid[regno] > move2add_last_jump_luid
+          && !reg_used_between_p (reg, reg_addr_use_insn[regno], insn)
+          && validate_change
+               (reg_addr_use_insn[regno], reg_addr_use[regno], 
+                gen_rtx_POST_MODIFY (mode, reg,
+                                     gen_rtx_PLUS (mode, reg, new_src)),
+                1)
+          && validate_change (insn, &SET_SRC (pat), reg, 1)
+          && apply_change_group ())
+    changed = true;
   else
     {
       struct full_rtx_costs oldcst, newcst;
@@ -1779,8 +1799,9 @@ move2add_use_add2_insn (scalar_int_mode
       SET_SRC (pat) = src;
 
       if (costs_lt_p (&newcst, &oldcst, speed)
-         && have_add2_insn (reg, new_src))
-       changed = validate_change (insn, &SET_SRC (pat), tem, 0);       
+         && have_add2_insn (reg, new_src)
+         && validate_change (insn, &SET_SRC (pat), tem, 0))
+       changed = true;
       else if (sym == NULL_RTX && mode != BImode)
        {
          scalar_int_mode narrow_mode;
@@ -1807,6 +1828,19 @@ move2add_use_add2_insn (scalar_int_mode
                    }
                }
            }
+         /* Some processors clobber some flags for add (hence it won't match
+            above), but none (that the compiler models) for xor.  */
+         if (!changed)
+           {
+             new_src = gen_int_mode (UINTVAL (off) ^ reg_offset[regno], mode);
+             tem = gen_rtx_XOR (mode, reg, new_src);
+             SET_SRC (pat) = tem;
+             get_full_set_rtx_cost (pat, &newcst);
+             SET_SRC (pat) = src;
+             if (costs_lt_p (&newcst, &oldcst, speed)
+                 && validate_change (insn, &SET_SRC (pat), tem, 0))
+               changed = true;
+           }
        }
     }
   move2add_record_sym_value (reg, sym, off);
@@ -1908,6 +1942,7 @@ reload_cse_move2add (rtx_insn *first)
   for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
     {
       reg_set_luid[i] = 0;
+      reg_addr_use_luid[i] = 0;
       reg_offset[i] = 0;
       reg_base_reg[i] = 0;
       reg_symbol_ref[i] = NULL_RTX;
@@ -1915,6 +1950,7 @@ reload_cse_move2add (rtx_insn *first)
     }
 
   move2add_last_label_luid = 0;
+  move2add_last_jump_luid = 0;
   move2add_luid = 2;
   for (insn = first; insn; insn = NEXT_INSN (insn), move2add_luid++)
     {
@@ -2104,8 +2140,29 @@ reload_cse_move2add (rtx_insn *first)
                }
            }
        }
+
+      if (HAVE_POST_MODIFY_DISP)
+       {
+         subrtx_var_iterator::array_type array;
+         FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
+           if (MEM_P (*iter))
+             {
+               rtx addr = XEXP (*iter, 0);
+               if (REG_P (addr))
+                 {
+                   int regno = REGNO (addr);
+                   reg_addr_use_luid[regno] = move2add_luid;
+                   reg_addr_use_insn[regno] = insn;
+                   reg_addr_use[regno] = &XEXP (*iter, 0);
+                 }
+             }
+       }
+
       note_stores (PATTERN (insn), move2add_note_store, insn);
 
+      if (JUMP_P (insn))
+       move2add_last_jump_luid = move2add_luid;
+
       /* If INSN is a conditional branch, we try to extract an
         implicit set out of it.  */
       if (any_condjump_p (insn))

Reply via email to