Hi!

This fixes #pragma omp ordered threads simd expansion.
In that case we want GOMP_ordered_start () / GOMP_ordered_end () calls
around the block, but those calls really should be done just once, while
the other stuff in between GOMP_SIMD_ORDERED_{START,END} internal calls
should be expanded in a loop from 0 to vf-1 with the iterator and linear
vars being adjusted there.  Therefore, I'm not emitting those calls in
between the internal calls (that is what eventually should be a loop), but
not outside either (because everything there should be vectorized).
Thus, it is handled as an argument to the internal calls (for now not a big
difference, as the vectorizer always gives up on this, but we should teach
it to handle that case eventually).

Regtested on x86_64-linux, committed to gomp-4_5-branch.

2015-11-13  Jakub Jelinek  <ja...@redhat.com>

        * omp-low.c (lower_omp_ordered): Add argument to GOMP_SIMD_ORDERED_*
        internal calls - 0 if ordered simd and 1 for ordered threads simd.
        * tree-vectorizer.c (adjust_simduid_builtins): If GOMP_SIMD_ORDERED_*
        argument is 1, replace it with GOMP_ordered_* call instead of removing
        it.

        * testsuite/libgomp.c/ordered-5.c: New test.

--- gcc/omp-low.c.jj    2015-11-09 11:17:31.000000000 +0100
+++ gcc/omp-low.c       2015-11-13 17:20:18.701832932 +0100
@@ -13924,8 +13924,10 @@ lower_omp_ordered (gimple_stmt_iterator
   gomp_ordered *ord_stmt = as_a <gomp_ordered *> (stmt);
   gcall *x;
   gbind *bind;
-  bool simd
-    = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), OMP_CLAUSE_SIMD);
+  bool simd = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
+                              OMP_CLAUSE_SIMD);
+  bool threads = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
+                                 OMP_CLAUSE_THREADS);
 
   if (find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
                       OMP_CLAUSE_DEPEND))
@@ -13948,7 +13950,8 @@ lower_omp_ordered (gimple_stmt_iterator
 
   if (simd)
     {
-      x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_START, 0);
+      x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_START, 1,
+                                     build_int_cst (NULL_TREE, threads));
       cfun->has_simduid_loops = true;
     }
   else
@@ -13962,7 +13965,8 @@ lower_omp_ordered (gimple_stmt_iterator
   gimple_omp_set_body (stmt, NULL);
 
   if (simd)
-    x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_END, 0);
+    x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_END, 1,
+                                   build_int_cst (NULL_TREE, threads));
   else
     x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_END),
                           0);
--- gcc/tree-vectorizer.c.jj    2015-11-09 11:17:56.000000000 +0100
+++ gcc/tree-vectorizer.c       2015-11-13 17:51:32.793269963 +0100
@@ -177,6 +177,21 @@ adjust_simduid_builtins (hash_table<simd
              break;
            case IFN_GOMP_SIMD_ORDERED_START:
            case IFN_GOMP_SIMD_ORDERED_END:
+             if (integer_onep (gimple_call_arg (stmt, 0)))
+               {
+                 enum built_in_function bcode
+                   = (ifn == IFN_GOMP_SIMD_ORDERED_START
+                      ? BUILT_IN_GOMP_ORDERED_START
+                      : BUILT_IN_GOMP_ORDERED_END);
+                 gimple *g
+                   = gimple_build_call (builtin_decl_explicit (bcode), 0);
+                 tree vdef = gimple_vdef (stmt);
+                 gimple_set_vdef (g, vdef);
+                 SSA_NAME_DEF_STMT (vdef) = g;
+                 gimple_set_vuse (g, gimple_vuse (stmt));
+                 gsi_replace (&i, g, true);
+                 continue;
+               }
              gsi_remove (&i, true);
              unlink_stmt_vdef (stmt);
              continue;
--- libgomp/testsuite/libgomp.c/ordered-5.c.jj  2015-11-13 17:56:26.182110077 
+0100
+++ libgomp/testsuite/libgomp.c/ordered-5.c     2015-11-13 17:56:51.266753981 
+0100
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+extern void abort (void);
+int a[1024], b = -1;
+
+int
+main ()
+{
+  int i;
+  #pragma omp parallel for simd ordered
+  for (i = 0; i < 1024; i++)
+    {
+      a[i] = i;
+      #pragma omp ordered threads simd
+      {
+       if (b + 1 != i)
+         abort ();
+       b = i;
+      }
+      a[i] += 3;
+    }
+  if (b != 1023)
+    abort ();
+  for (i = 0; i < 1024; i++)
+    if (a[i] != i + 3)
+      abort ();
+  return 0;
+}

        Jakub

Reply via email to