Hi

This patch is trying to fix PR88398 for AArch64. As discussed in the PR, 
loop unrolling is probably what we can do here. As an easy fix, the 
existing unroll_stupid is unrolling the given example better than the 
unroll_runtime_iterations since the the loop contains a break inside it.

So all I have done here is:
1) Add a target hook so that this is AArch64 specific.
2) We are not unrolling the loops that decide_unroll_runtime_iterations
would reject.
3) Out of the ones that decide_unroll_runtime_iterations would accept, 
check if the loop has more than 1 exit (this is done in the new target 
hook) and if it does, try to unroll using unroll_stupid.

Regression tested on AArch64 and added the test from the PR. This gives 
an overall code size reduction of 2.35% and performance gain of 0.498% 
on Spec2017 Intrate.

Is this ok for trunk?

Thanks
Sudi

gcc/ChangeLog:

2019-xx-xx  Sudakshina Das  <sudi....@arm.com>

        PR88398
        * cfgloop.h: Include target.h.
        (lpt_dec): Move to...
        * target.h (lpt_dec): ... Here.
        * target.def: Define TARGET_LOOP_DECISION_ADJUST.
        * loop-unroll.c (decide_unroll_runtime_iterations): Use new target hook.
        (decide_unroll_stupid): Likewise.
        * config/aarch64/aarch64.c (aarch64_loop_decision_adjust): New function.
        (TARGET_LOOP_DECISION_ADJUST): Define for AArch64.
        * doc/tm.texi: Regenerated.
        * doc/tm.texi.in: Document TARGET_LOOP_DECISION_ADJUST.

gcc/testsuite/ChangeLog:

2019-xx-xx  Sudakshina Das  <sudi....@arm.com>

        PR88398
        * gcc.target/aarch64/pr88398.c: New test.
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 0b0154ffd7bf031a005de993b101d9db6dd98c43..985c74e3b60728fc8c9d34b69634488cae3451cb 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -21,15 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #define GCC_CFGLOOP_H
 
 #include "cfgloopmanip.h"
-
-/* Structure to hold decision about unrolling/peeling.  */
-enum lpt_dec
-{
-  LPT_NONE,
-  LPT_UNROLL_CONSTANT,
-  LPT_UNROLL_RUNTIME,
-  LPT_UNROLL_STUPID
-};
+#include "target.h"
 
 struct GTY (()) lpt_decision {
   enum lpt_dec decision;
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 599d07a729e7438080f8b5240ee95037a49fb983..f31ac41d66257c01ead8d5f5b9b22379ecb5d276 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -21093,6 +21093,39 @@ aarch64_sched_can_speculate_insn (rtx_insn *insn)
     }
 }
 
+/* Implement TARGET_LOOP_DECISION_ADJUST.  CONSIDER is the loop decision
+   currently being checked for loop LOOP.  This returns a decision which could
+   either be LPT_UNROLL_STUPID or the current value in LOOP.  */
+static enum lpt_dec
+aarch64_loop_decision_adjust (enum lpt_dec consider, class loop *loop)
+{
+  switch (consider)
+    {
+    case LPT_UNROLL_CONSTANT:
+      return loop->lpt_decision.decision;
+
+    case LPT_UNROLL_RUNTIME:
+    /* Fall through.  */
+    case LPT_UNROLL_STUPID:
+      {
+	vec<edge> edges = get_loop_exit_edges (loop);
+	if (edges.length () > 1)
+	  {
+	    if (dump_file)
+	      fprintf (dump_file, ";; Need change in loop decision\n");
+	    consider = LPT_UNROLL_STUPID;
+	    return consider;
+	  }
+	return loop->lpt_decision.decision;
+      }
+
+    case LPT_NONE:
+    /* Fall through.  */
+    default:
+      gcc_unreachable ();
+    }
+}
+
 /* Implement TARGET_COMPUTE_PRESSURE_CLASSES.  */
 
 static int
@@ -21839,6 +21872,9 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_CAN_USE_DOLOOP_P
 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 
+#undef TARGET_LOOP_DECISION_ADJUST
+#define TARGET_LOOP_DECISION_ADJUST aarch64_loop_decision_adjust
+
 #undef TARGET_SCHED_ADJUST_PRIORITY
 #define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index cd9aed9874f4e6b2b0e2f8956ed6155975e643a8..61bd00e84c8a2a8865e95ba579c3b94790ab1331 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11857,6 +11857,15 @@ is required only when the target has special constraints like maximum
 number of memory accesses.
 @end deftypefn
 
+@deftypefn {Target Hook} {enum lpt_dec} TARGET_LOOP_DECISION_ADJUST (enum lpt_dec @var{consider}, class loop *@var{loop})
+This target hook returns either a new value for the loop unrolling
+decision or the existing value in @var{loop}. The parameter @var{consider}
+is the loop decision currently being tested. The parameter @var{loop} is a
+pointer to the loop, which is going to be checked for unrolling. This target
+hook is required only when the target wants to override the unrolling
+decisions.
+@end deftypefn
+
 @defmac POWI_MAX_MULTS
 If defined, this macro is interpreted as a signed integer C expression
 that specifies the maximum number of floating point multiplications
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 2739e9ceec5ad7253ff9135da8dbe3bf6010e8d7..7a7f917fb45a6cc22f373ff16f8b78aa3e35f210 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8026,6 +8026,8 @@ build_type_attribute_variant (@var{mdecl},
 
 @hook TARGET_LOOP_UNROLL_ADJUST
 
+@hook TARGET_LOOP_DECISION_ADJUST
+
 @defmac POWI_MAX_MULTS
 If defined, this macro is interpreted as a signed integer C expression
 that specifies the maximum number of floating point multiplications
diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c
index 63fccd23fae38f8918a7d94411aaa43c72830dd3..d8f09d445764f74b8142ff3996cd1db229464202 100644
--- a/gcc/loop-unroll.c
+++ b/gcc/loop-unroll.c
@@ -672,6 +672,7 @@ decide_unroll_runtime_iterations (class loop *loop, int flags)
   unsigned nunroll, nunroll_by_av, i;
   class niter_desc *desc;
   widest_int iterations;
+  enum lpt_dec decision = LPT_UNROLL_RUNTIME;
 
   /* If we were not asked to unroll this loop, just return back silently.  */
   if (!(flags & UAP_UNROLL) && !loop->unroll)
@@ -735,6 +736,16 @@ decide_unroll_runtime_iterations (class loop *loop, int flags)
       return;
     }
 
+  if (targetm.loop_decision_adjust)
+    decision = targetm.loop_decision_adjust (decision, loop);
+
+  if (decision != loop->lpt_decision.decision)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; Not unrolling loop, there maybe a better decision\n");
+      return;
+    }
+
   /* Success; now force nunroll to be power of 2, as code-gen
      requires it, we are unable to cope with overflows in
      computation of number of iterations.  */
@@ -1157,9 +1168,16 @@ decide_unroll_stupid (class loop *loop, int flags)
   unsigned nunroll, nunroll_by_av, i;
   class niter_desc *desc;
   widest_int iterations;
+  enum lpt_dec decision = LPT_UNROLL_STUPID;
 
-  /* If we were not asked to unroll this loop, just return back silently.  */
-  if (!(flags & UAP_UNROLL_ALL) && !loop->unroll)
+  if (targetm.loop_decision_adjust)
+    decision = targetm.loop_decision_adjust (decision, loop);
+
+  /* If we were not asked to unroll this loop and the target also does not
+     tell us otherwise, just return back silently.  */
+  if (!(flags & UAP_UNROLL_ALL)
+      && !loop->unroll
+      && decision == loop->lpt_decision.decision)
     return;
 
   if (dump_enabled_p ())
@@ -1189,26 +1207,41 @@ decide_unroll_stupid (class loop *loop, int flags)
       return;
     }
 
-  /* Check for simple loops.  */
   desc = get_simple_loop_desc (loop);
 
-  /* Check simpleness.  */
-  if (desc->simple_p && !desc->assumptions)
+  /* Skip/add a few checks if the target changed the decision.  */
+  if (decision == loop->lpt_decision.decision)
     {
-      if (dump_file)
-	fprintf (dump_file, ";; Loop is simple\n");
-      return;
-    }
+      /* Check simpleness.  */
+      if (desc->simple_p && !desc->assumptions)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; Loop is simple\n");
+	  return;
+	}
 
-  /* Do not unroll loops with branches inside -- it increases number
-     of mispredicts. 
-     TODO: this heuristic needs tunning; call inside the loop body
-     is also relatively good reason to not unroll.  */
-  if (num_loop_branches (loop) > 1)
+      /* Do not unroll loops with branches inside -- it increases number
+	 of mispredicts.
+	 TODO: this heuristic needs tunning; call inside the loop body
+	 is also relatively good reason to not unroll.  */
+      if (num_loop_branches (loop) > 1)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; Not unrolling, contains branches\n");
+	  return;
+	}
+    }
+  else
     {
-      if (dump_file)
-	fprintf (dump_file, ";; Not unrolling, contains branches\n");
-      return;
+      /* For target effected loops, adding the simpleness check from
+	 decide_unroll_runtime_iterations since we do not want to add more
+	 unrolling than before.  */
+      if (!desc->simple_p && desc->assumptions)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; Not unrolling, loop is not simple\n");
+	  return;
+	}
     }
 
   /* Check whether the loop rolls.  */
diff --git a/gcc/target.def b/gcc/target.def
index 8e83c2c7a7136511c07a5bc9e18876c91a38b955..d869495f9c8a9b9187a802298e28aeb334a94cfb 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2680,6 +2680,18 @@ number of memory accesses.",
  unsigned, (unsigned nunroll, class loop *loop),
  NULL)
 
+/* Return a new value for loop unroll decision.  */
+DEFHOOK
+(loop_decision_adjust,
+ "This target hook returns either a new value for the loop unrolling\n\
+decision or the existing value in @var{loop}. The parameter @var{consider}\n\
+is the loop decision currently being tested. The parameter @var{loop} is a\n\
+pointer to the loop, which is going to be checked for unrolling. This target\n\
+hook is required only when the target wants to override the unrolling\n\
+decisions.",
+ enum lpt_dec, (enum lpt_dec consider, class loop *loop),
+ NULL)
+
 /* True if X is a legitimate MODE-mode immediate operand.  */
 DEFHOOK
 (legitimate_constant_p,
diff --git a/gcc/target.h b/gcc/target.h
index 843c3d7887f8b49edfe9e45abf9de760176896e4..f67084a449a145f3c8016df8b9230ae835eef363 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -140,6 +140,16 @@ struct ddg;
 /* This is defined in cfgloop.h .  */
 class loop;
 
+/* Structure to hold decision about unrolling/peeling.  This is used in
+   targetm.loop_unroll_adjust and in class loop.  */
+enum lpt_dec
+{
+  LPT_NONE,
+  LPT_UNROLL_CONSTANT,
+  LPT_UNROLL_RUNTIME,
+  LPT_UNROLL_STUPID
+};
+
 /* This is defined in ifcvt.h.  */
 struct noce_if_info;
 
diff --git a/gcc/testsuite/gcc.target/aarch64/pr88398.c b/gcc/testsuite/gcc.target/aarch64/pr88398.c
new file mode 100644
index 0000000000000000000000000000000000000000..ad1929a4e81df92972418ab5c7ed2a2e6db4f157
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr88398.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -funroll-loops -fdump-rtl-loop2_unroll-all" } */
+
+typedef unsigned char __uint8_t;
+typedef unsigned int __uint32_t;
+typedef unsigned long long __uint64_t;
+
+int
+foo (const __uint64_t len_limit, const __uint8_t *cur,
+             __uint32_t delta, int len) {
+
+  const __uint8_t *pb = cur - delta;
+
+  while (++len != len_limit) {
+    if (pb[len] != cur[len])
+      break;
+  }
+
+  return len;
+}
+
+/* { dg-final { scan-rtl-dump "considering unrolling loop stupidly" "loop2_unroll" } } */
+/* { dg-final { scan-rtl-dump "optimized: loop unrolled 7 times" "loop2_unroll" } } */

Reply via email to