On 13/11/15 12:39, Jakub Jelinek wrote:
On Fri, Nov 13, 2015 at 12:29:51PM +0100, Richard Biener wrote:
thanks for the explanation. Filed as PR68331 - '[meta-bug] fipa-pta issues'.

Any feedback on the '#pragma GCC offload-alias=<none|pointer|all>' bit above?
Is that sort of what you had in mind?

Yes.  Whether that makes sense is another question of course.  You can
annotate memory references with MR_DEPENDENCE_BASE/CLIQUE yourself
as well if you know dependences without the users intervention.

I really don't like even the GCC offload-alias, I just don't see anything
special on the offload code.  Not to mention that the same issue is already
with other outlined functions, like OpenMP tasks or parallel regions, those
aren't offloaded, yet they can suffer from worse alias/points-to analysis
too.

AFAIU there is one aspect that is different for offloaded code: the setup of the data on the device.

Consider this example:
...
unsigned int a[N];
unsigned int b[N];
unsigned int c[N];

int
main (void)
{
  ...

#pragma acc kernels copyin (a) copyin (b) copyout (c)
  {
    for (COUNTERTYPE ii = 0; ii < N; ii++)
      c[ii] = a[ii] + b[ii];
  }

  ...
...

At gimple level, we have:
...
#pragma omp target oacc_kernels \
  map(force_from:c [len: 2097152]) \
  map(force_to:b [len: 2097152]) \
  map(force_to:a [len: 2097152])
...

[ The meaning of the force_from/force_to mappings is given in include/gomp-constants.h:
...
    /* Allocate.  */
    GOMP_MAP_FORCE_ALLOC = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_ALLOC),
    /* ..., and copy to device.  */
    GOMP_MAP_FORCE_TO = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_TO),
    /* ..., and copy from device.  */
    GOMP_MAP_FORCE_FROM = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_FROM),
    /* ..., and copy to and from device.  */
    GOMP_MAP_FORCE_TOFROM = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_TOFROM),
...  ]

So before calling the offloaded function, a separate alloc is done for a, b and c, and the base pointers of the newly allocated objects are passed to the offloaded function.

This means we can mark those base pointers as restrict in the offloaded function.

Attached proof-of-concept patch implements that.

We simply have some compiler internal interface between the caller and
callee of the outlined regions, each interface in between those has
its own structure type used to communicate the info;
we can attach attributes on the fields, or some flags to indicate some
properties interesting from aliasing POV.
We don't really need to perform
full IPA-PTA, perhaps it would be enough to a) record somewhere in cgraph
the relationship in between such callers and callees (for offloading regions
we already have "omp target entrypoint" attribute on the callee and a
singler caller), tell LTO if possible not to split those into different
partitions if easily possible, and then just for these pairs perform
aliasing/points-to analysis in the caller and the result record using
cliques/special attributes/whatever to the callee side, so that the callee
(outlined OpenMP/OpenACC/Cilk+ region) can then improve its alias analysis.

As a start, is the approach of this patch OK?

It will allow us to commit the oacc kernels patch series with the ability to parallelize non-trivial testcases, and work on improving the alias bit after that.

Thanks,
- Tom



Mark pointers to allocated target vars as restricted, if possible

---
 gcc/omp-low.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 62 insertions(+), 5 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 268b67b..0ce822d 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -1372,7 +1372,8 @@ build_sender_ref (tree var, omp_context *ctx)
 /* Add a new field for VAR inside the structure CTX->SENDER_DECL.  */
 
 static void
-install_var_field (tree var, bool by_ref, int mask, omp_context *ctx)
+install_var_field_1 (tree var, bool by_ref, int mask, omp_context *ctx,
+		     bool base_pointers_restrict)
 {
   tree field, type, sfield = NULL_TREE;
   splay_tree_key key = (splay_tree_key) var;
@@ -1396,7 +1397,11 @@ install_var_field (tree var, bool by_ref, int mask, omp_context *ctx)
       type = build_pointer_type (build_pointer_type (type));
     }
   else if (by_ref)
-    type = build_pointer_type (type);
+    {
+      type = build_pointer_type (type);
+      if (base_pointers_restrict)
+	type = build_qualified_type (type, TYPE_QUAL_RESTRICT);
+    }
   else if ((mask & 3) == 1 && is_reference (var))
     type = TREE_TYPE (type);
 
@@ -1460,6 +1465,12 @@ install_var_field (tree var, bool by_ref, int mask, omp_context *ctx)
     splay_tree_insert (ctx->sfield_map, key, (splay_tree_value) sfield);
 }
 
+static void
+install_var_field (tree var, bool by_ref, int mask, omp_context *ctx)
+{
+  install_var_field_1 (var, by_ref, mask, ctx, false);
+}
+
 static tree
 install_var_local (tree var, omp_context *ctx)
 {
@@ -1816,7 +1827,8 @@ fixup_child_record_type (omp_context *ctx)
    specified by CLAUSES.  */
 
 static void
-scan_sharing_clauses (tree clauses, omp_context *ctx)
+scan_sharing_clauses_1 (tree clauses, omp_context *ctx,
+			bool base_pointers_restrict)
 {
   tree c, decl;
   bool scan_array_reductions = false;
@@ -2073,7 +2085,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
 		      && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
 		    install_var_field (decl, true, 7, ctx);
 		  else
-		    install_var_field (decl, true, 3, ctx);
+		    install_var_field_1 (decl, true, 3, ctx, base_pointers_restrict);
 		  if (is_gimple_omp_offloaded (ctx->stmt))
 		    install_var_local (decl, ctx);
 		}
@@ -2339,6 +2351,12 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
 	scan_omp (&OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c), ctx);
 }
 
+static void
+scan_sharing_clauses (tree clauses, omp_context *ctx)
+{
+  scan_sharing_clauses_1 (clauses, ctx, false);
+}
+
 /* Create a new name for omp child function.  Returns an identifier.  If
    IS_CILK_FOR is true then the suffix for the child function is
    "_cilk_for_fn."  */
@@ -3056,13 +3074,52 @@ scan_omp_target (gomp_target *stmt, omp_context *outer_ctx)
   DECL_NAMELESS (name) = 1;
   TYPE_NAME (ctx->record_type) = name;
   TYPE_ARTIFICIAL (ctx->record_type) = 1;
+
+  bool base_pointers_restrict = false;
   if (offloaded)
     {
       create_omp_child_function (ctx, false);
       gimple_omp_target_set_child_fn (stmt, ctx->cb.dst_fn);
+
+      /* If all the clauses force allocation, we can be certain that the objects
+	 on the target are disjoint, and therefore mark the base pointers as
+	 restrict.  */
+      base_pointers_restrict = true;
+      tree c;
+      for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+	{
+	  switch (OMP_CLAUSE_CODE (c))
+	    {
+	    case OMP_CLAUSE_MAP:
+	      switch (OMP_CLAUSE_MAP_KIND (c))
+		{
+		case GOMP_MAP_ALLOC:
+		case GOMP_MAP_FORCE_TO:
+		case GOMP_MAP_FORCE_FROM:
+		case GOMP_MAP_FORCE_TOFROM:
+		  break;
+		default:
+		  base_pointers_restrict = false;
+		  break;
+		}
+	      break;
+
+	    default:
+	      base_pointers_restrict = false;
+	      break;
+	    }
+
+	  if (!base_pointers_restrict)
+	    break;
+	}
+      if (base_pointers_restrict)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "Base pointers in offloaded function are restrict\n");
+	}
     }
 
-  scan_sharing_clauses (clauses, ctx);
+  scan_sharing_clauses_1 (clauses, ctx, base_pointers_restrict);
   scan_omp (gimple_omp_body_ptr (stmt), ctx);
 
   if (TYPE_FIELDS (ctx->record_type) == NULL)

Reply via email to