For very small loops (< 6 insns), it would be fine to unroll 4
times to use cache line better.  Like below loops:
 `while (i) a[--i] = NULL;   while (p < e)  *d++ = *p++;`

And for very complex loops which may cause negative impacts:
branch-miss or cache-miss. Like below loop: there are calls,
early exits and branches in loop.
```
  for (int i = 0; i < n; i++) {
              int e = a[I];
             ....
              if (function_call(e))  break;
             ....
  }
```

This patch enhances RTL unroll for small loops and prevent to
unroll complex loops.

gcc/ChangeLog
2020-07-03  Jiufu Guo  <guoji...@linux.ibm.com>

        * config/rs6000/rs6000.c (rs6000_loop_unroll_adjust): Refine hook.
        (rs6000_complex_loop_p): New function.
        (num_loop_calls): New function.
---
 gcc/config/rs6000/rs6000.c | 46 +++++++++++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 58f5d780603..a4874fa0efc 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5130,22 +5130,56 @@ rs6000_destroy_cost_data (void *data)
   free (data);
 }
 
+/* Count the number of call insns in LOOP.  */
+static unsigned int
+num_loop_calls (struct loop *loop)
+{
+  basic_block *bbs;
+  rtx_insn *insn;
+  unsigned int i;
+  unsigned int call_ins_num = 0;
+
+  bbs = get_loop_body (loop);
+  for (i = 0; i < loop->num_nodes; i++)
+    FOR_BB_INSNS (bbs[i], insn)
+      if (CALL_P (insn))
+       call_ins_num++;
+
+  free (bbs);
+
+  return call_ins_num;
+}
+
+/* Return true if LOOP is too complex to be unrolled.  */
+static bool
+rs6000_complex_loop_p (struct loop *loop)
+{
+  unsigned call_num;
+
+  return loop->ninsns > 10
+    && (call_num = num_loop_calls (loop)) > 0
+    && (call_num + num_loop_branches (loop)) * 5 > loop->ninsns
+    && !single_exit (loop);
+}
+
 /* Implement targetm.loop_unroll_adjust.  */
 
 static unsigned
 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
 {
-   if (unroll_only_small_loops)
+  if (unroll_only_small_loops)
     {
-      /* TODO: This is hardcoded to 10 right now.  It can be refined, for
-        example we may want to unroll very small loops more times (4 perhaps).
-        We also should use a PARAM for this.  */
+      if (loop->ninsns <= 6)
+       return MIN (4, nunroll);
       if (loop->ninsns <= 10)
        return MIN (2, nunroll);
-      else
-       return 0;
+
+      return 0;
     }
 
+  if (rs6000_complex_loop_p (loop))
+    return 0;
+
   return nunroll;
 }
 
-- 
2.25.1

Reply via email to