Hi,

This patch introduces an option fopt-info-oacc.

When using the option like this with a kernels region in kernels-loop.c that parloops does not manage to parallelize:
...
$ gcc kernels-loop.c -S -O2 -fopenacc -fopt-info-oacc-all
...

we get a message:
...
kernels-loop.c:23:9: note: kernels region executed sequentially. Consider mapping it to host execution, to avoid data copy penalty.
...

Any comments?

Thanks,
- Tom
Add fopt-info-oacc

---
 gcc/dumpfile.c |  1 +
 gcc/dumpfile.h |  5 +++--
 gcc/omp-low.c  | 30 +++++++++++++++++++++++++++++-
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/gcc/dumpfile.c b/gcc/dumpfile.c
index 144e371..e8aa0e1 100644
--- a/gcc/dumpfile.c
+++ b/gcc/dumpfile.c
@@ -137,6 +137,7 @@ static const struct dump_option_value_info optgroup_options[] =
   {"loop", OPTGROUP_LOOP},
   {"inline", OPTGROUP_INLINE},
   {"vec", OPTGROUP_VEC},
+  {"oacc", OPTGROUP_OACC},
   {"optall", OPTGROUP_ALL},
   {NULL, 0}
 };
diff --git a/gcc/dumpfile.h b/gcc/dumpfile.h
index c168cbf..6e1c657 100644
--- a/gcc/dumpfile.h
+++ b/gcc/dumpfile.h
@@ -97,9 +97,10 @@ enum tree_dump_index
 #define OPTGROUP_LOOP        (1 << 2)   /* Loop optimization passes */
 #define OPTGROUP_INLINE      (1 << 3)   /* Inlining passes */
 #define OPTGROUP_VEC         (1 << 4)   /* Vectorization passes */
-#define OPTGROUP_OTHER       (1 << 5)   /* All other passes */
+#define OPTGROUP_OACC        (1 << 5)   /* Openacc passes */
+#define OPTGROUP_OTHER       (1 << 6)   /* All other passes */
 #define OPTGROUP_ALL	     (OPTGROUP_IPA | OPTGROUP_LOOP | OPTGROUP_INLINE \
-                              | OPTGROUP_VEC | OPTGROUP_OTHER)
+                              | OPTGROUP_VEC | OPTGROUP_OACC | OPTGROUP_OTHER)
 
 /* Define a tree dump switch.  */
 struct dump_file_info
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index a6e3fe3..d5c3484 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -20139,6 +20139,34 @@ execute_oacc_device_lower ()
 	     : fn_level < 0 ? "Function is parallel offload\n"
 	     : "Function is routine level %d\n", fn_level);
 
+#if defined ACCEL_COMPILER
+  bool is_kernels = oacc_fn_attrib_kernels_p (attrs);
+  if (is_kernels)
+    {
+      bool all_one = true;
+      tree pos = TREE_VALUE (attrs);
+      for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
+	{
+	  tree tree_val = TREE_VALUE (pos);
+	  unsigned HOST_WIDE_INT val = (tree_val
+					? TREE_INT_CST_LOW (tree_val)
+					: 1);
+	  if (val != 1)
+	    {
+	      all_one = false;
+	      break;
+	    }
+	  pos = TREE_CHAIN (pos);
+	}
+
+      if (all_one)
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, cfun->function_start_locus,
+			 "Kernels region executed sequentially.  Consider"
+			 " mapping it to host execution, to avoid data copy"
+			 " penalty.\n");
+    }
+#endif
+
   unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
   unsigned used_mask = oacc_loop_partition (loops, outer_mask);
   int dims[GOMP_DIM_MAX];
@@ -20312,7 +20340,7 @@ const pass_data pass_data_oacc_device_lower =
 {
   GIMPLE_PASS, /* type */
   "oaccdevlow", /* name */
-  OPTGROUP_NONE, /* optinfo_flags */
+  OPTGROUP_OACC, /* optinfo_flags */
   TV_NONE, /* tv_id */
   PROP_cfg, /* properties_required */
   0 /* Possibly PROP_gimple_eomp.  */, /* properties_provided */

Reply via email to