Hi!

This patch fixes a problem where after processing some omp parallel
with shared clause on some non-addressable var (where we decide to
use copy-in/out for it) we process omp task with shared clause on the
same var and for task shared vars we can't use copy-in/out ever, thus
have to force it addressable and we ICE because copy-in/out is not expected
for addressable vars.  The runtime testcase shows that we actually must
not use copy-in/out in that case, otherwise we introduce data races where
there weren't in the source originally.

Fixed by deferring the layout of the record types for parallel/task regions
until all of scan_omp is processed, and before the layout testing for this
case and adjusting vars from copy-in/out to by reference passing if needed.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to
trunk/4.9.

2014-10-03  Jakub Jelinek  <ja...@redhat.com>

        PR libgomp/61200
        * omp-low.c (taskreg_contexts): New variable.
        (scan_omp_parallel): Push newly created context into taskreg_contexts
        vector and move record layout code to finish_taskreg_scan.
        (scan_omp_task): Likewise.
        (finish_taskreg_scan): New function.
        (execute_lower_omp): Call finish_taskreg_scan on all taskreg_contexts
        vector elements and release it.

        * c-c++-common/gomp/pr61200.c: New test.

        * testsuite/libgomp.c/pr61200.c: New test.

--- gcc/omp-low.c.jj    2014-09-29 07:20:40.000000000 +0200
+++ gcc/omp-low.c       2014-10-02 18:39:47.542770018 +0200
@@ -204,6 +204,7 @@ static int taskreg_nesting_level;
 static int target_nesting_level;
 static struct omp_region *root_omp_region;
 static bitmap task_shared_vars;
+static vec<omp_context *> taskreg_contexts;
 
 static void scan_omp (gimple_seq *, omp_context *);
 static tree scan_omp_1_op (tree *, int *, void *);
@@ -2097,6 +2098,7 @@ scan_omp_parallel (gimple_stmt_iterator
     }
 
   ctx = new_omp_context (stmt, outer_ctx);
+  taskreg_contexts.safe_push (ctx);
   if (taskreg_nesting_level > 1)
     ctx->is_nested = true;
   ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
@@ -2116,11 +2118,6 @@ scan_omp_parallel (gimple_stmt_iterator
 
   if (TYPE_FIELDS (ctx->record_type) == NULL)
     ctx->record_type = ctx->receiver_decl = NULL;
-  else
-    {
-      layout_type (ctx->record_type);
-      fixup_child_record_type (ctx);
-    }
 }
 
 /* Scan an OpenMP task directive.  */
@@ -2131,7 +2128,6 @@ scan_omp_task (gimple_stmt_iterator *gsi
   omp_context *ctx;
   tree name, t;
   gimple stmt = gsi_stmt (*gsi);
-  location_t loc = gimple_location (stmt);
 
   /* Ignore task directives with empty bodies.  */
   if (optimize > 0
@@ -2142,6 +2138,7 @@ scan_omp_task (gimple_stmt_iterator *gsi
     }
 
   ctx = new_omp_context (stmt, outer_ctx);
+  taskreg_contexts.safe_push (ctx);
   if (taskreg_nesting_level > 1)
     ctx->is_nested = true;
   ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
@@ -2179,8 +2176,71 @@ scan_omp_task (gimple_stmt_iterator *gsi
       t = build_int_cst (long_integer_type_node, 1);
       gimple_omp_task_set_arg_align (stmt, t);
     }
+}
+
+
+/* If any decls have been made addressable during scan_omp,
+   adjust their fields if needed, and layout record types
+   of parallel/task constructs.  */
+
+static void
+finish_taskreg_scan (omp_context *ctx)
+{
+  if (ctx->record_type == NULL_TREE)
+    return;
+
+  /* If any task_shared_vars were needed, verify all
+     OMP_CLAUSE_SHARED clauses on GIMPLE_OMP_{PARALLEL,TASK}
+     statements if use_pointer_for_field hasn't changed
+     because of that.  If it did, update field types now.  */
+  if (task_shared_vars)
+    {
+      tree c;
+
+      for (c = gimple_omp_taskreg_clauses (ctx->stmt);
+          c; c = OMP_CLAUSE_CHAIN (c))
+       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED)
+         {
+           tree decl = OMP_CLAUSE_DECL (c);
+
+           /* Global variables don't need to be copied,
+              the receiver side will use them directly.  */
+           if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
+             continue;
+           if (!bitmap_bit_p (task_shared_vars, DECL_UID (decl))
+               || !use_pointer_for_field (decl, ctx))
+             continue;
+           tree field = lookup_field (decl, ctx);
+           if (TREE_CODE (TREE_TYPE (field)) == POINTER_TYPE
+               && TREE_TYPE (TREE_TYPE (field)) == TREE_TYPE (decl))
+             continue;
+           TREE_TYPE (field) = build_pointer_type (TREE_TYPE (decl));
+           TREE_THIS_VOLATILE (field) = 0;
+           DECL_USER_ALIGN (field) = 0;
+           DECL_ALIGN (field) = TYPE_ALIGN (TREE_TYPE (field));
+           if (TYPE_ALIGN (ctx->record_type) < DECL_ALIGN (field))
+             TYPE_ALIGN (ctx->record_type) = DECL_ALIGN (field);
+           if (ctx->srecord_type)
+             {
+               tree sfield = lookup_sfield (decl, ctx);
+               TREE_TYPE (sfield) = TREE_TYPE (field);
+               TREE_THIS_VOLATILE (sfield) = 0;
+               DECL_USER_ALIGN (sfield) = 0;
+               DECL_ALIGN (sfield) = DECL_ALIGN (field);
+               if (TYPE_ALIGN (ctx->srecord_type) < DECL_ALIGN (sfield))
+                 TYPE_ALIGN (ctx->srecord_type) = DECL_ALIGN (sfield);
+             }
+         }
+    }
+
+  if (gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL)
+    {
+      layout_type (ctx->record_type);
+      fixup_child_record_type (ctx);
+    }
   else
     {
+      location_t loc = gimple_location (ctx->stmt);
       tree *p, vla_fields = NULL_TREE, *q = &vla_fields;
       /* Move VLA fields to the end.  */
       p = &TYPE_FIELDS (ctx->record_type);
@@ -2200,12 +2260,12 @@ scan_omp_task (gimple_stmt_iterator *gsi
       fixup_child_record_type (ctx);
       if (ctx->srecord_type)
        layout_type (ctx->srecord_type);
-      t = fold_convert_loc (loc, long_integer_type_node,
-                       TYPE_SIZE_UNIT (ctx->record_type));
-      gimple_omp_task_set_arg_size (stmt, t);
+      tree t = fold_convert_loc (loc, long_integer_type_node,
+                                TYPE_SIZE_UNIT (ctx->record_type));
+      gimple_omp_task_set_arg_size (ctx->stmt, t);
       t = build_int_cst (long_integer_type_node,
                         TYPE_ALIGN_UNIT (ctx->record_type));
-      gimple_omp_task_set_arg_align (stmt, t);
+      gimple_omp_task_set_arg_align (ctx->stmt, t);
     }
 }
 
@@ -10560,6 +10620,8 @@ static unsigned int
 execute_lower_omp (void)
 {
   gimple_seq body;
+  int i;
+  omp_context *ctx;
 
   /* This pass always runs, to provide PROP_gimple_lomp.
      But there is nothing to do unless -fopenmp is given.  */
@@ -10572,6 +10634,9 @@ execute_lower_omp (void)
   body = gimple_body (current_function_decl);
   scan_omp (&body, NULL);
   gcc_assert (taskreg_nesting_level == 0);
+  FOR_EACH_VEC_ELT (taskreg_contexts, i, ctx)
+    finish_taskreg_scan (ctx);
+  taskreg_contexts.release ();
 
   if (all_contexts->root)
     {
--- gcc/testsuite/c-c++-common/gomp/pr61200.c.jj        2014-10-02 
19:06:52.561661363 +0200
+++ gcc/testsuite/c-c++-common/gomp/pr61200.c   2014-10-02 19:07:25.403053890 
+0200
@@ -0,0 +1,13 @@
+/* PR libgomp/61200 */
+
+int
+main ()
+{
+  int var = 1;
+  #pragma omp parallel
+    if (var != 1)
+      __builtin_abort ();
+  #pragma omp task shared(var)
+    var = 2;
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/pr61200.c.jj    2014-10-02 19:06:04.164556406 
+0200
+++ libgomp/testsuite/libgomp.c/pr61200.c       2014-10-02 19:06:19.052281461 
+0200
@@ -0,0 +1,87 @@
+/* PR libgomp/61200 */
+/* { dg-do run } */
+
+#include <omp.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+volatile int x;
+
+void
+foo ()
+{
+  int var = 1;
+  int i;
+
+  for (i = 0; i < 2; i++)
+    {
+      if (i == 1)
+       {
+         #pragma omp parallel num_threads(2)
+           if (x)
+             var++;
+           else
+             {
+               #pragma omp single
+                 sleep (2);
+             }
+       }
+      else
+       {
+         #pragma omp task shared(var)
+         {
+           sleep (1);
+           var = 2;
+         }
+       }
+    }
+  #pragma omp taskwait
+  if (var != 2)
+    abort ();
+}
+
+void
+bar ()
+{
+  int var = 1;
+  int i;
+
+  for (i = 0; i < 2; i++)
+    {
+      if (i == 0)
+       {
+         #pragma omp task shared(var)
+         {
+           sleep (1);
+           var = 2;
+         }
+       }
+      else
+       {
+         #pragma omp parallel num_threads(2)
+           if (x)
+             var++;
+           else
+             {
+               #pragma omp single
+                 sleep (2);
+             }
+       }
+    }
+  #pragma omp taskwait
+  if (var != 2)
+    abort ();
+}
+
+int
+main ()
+{
+  omp_set_nested (1);
+  #pragma omp parallel num_threads(2)
+    #pragma omp single
+      foo ();
+  #pragma omp parallel num_threads(2)
+    #pragma omp single
+      bar ();
+  return 0;
+}

        Jakub

Reply via email to