Hi!

To be able to properly vectorize code that uses private, lastprivate or linear
clauses with reference arguments, we should be using "omp simd array" types
holding what those references bind to.  The following patch in addition
disables "omp simd array" vectorization in loops where those reference
arguments refer to variable length types.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2019-06-05  Jakub Jelinek  <ja...@redhat.com>

        * omp-low.c (lower_rec_input_clauses): Force max_vf if is_simd and
        on privatization clauses OMP_CLAUSE_DECL is privatized by reference
        and references a VLA.  Handle references to non-VLAs if is_simd
        all privatization clauses like reductions.
        (lower_rec_input_clauses) <case do_private, case do_firstprivate>:
        If omp_is_reference, use always omp simd arrays and set
        DECL_VALUE_EXPR in that case, if lower_rec_simd_input_clauses
        fails, emit reference initialization.

        * g++.dg/vect/simd-1.cc: New test.

--- gcc/omp-low.c.jj    2019-06-03 17:57:42.060631242 +0200
+++ gcc/omp-low.c       2019-06-04 14:23:02.860608537 +0200
@@ -3831,12 +3831,24 @@ lower_rec_input_clauses (tree clauses, g
        case OMP_CLAUSE_LASTPRIVATE:
          if (is_variable_sized (OMP_CLAUSE_DECL (c)))
            sctx.max_vf = 1;
+         else if (omp_is_reference (OMP_CLAUSE_DECL (c)))
+           {
+             tree rtype = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
+             if (!TREE_CONSTANT (TYPE_SIZE_UNIT (rtype)))
+               sctx.max_vf = 1;
+           }
          break;
        case OMP_CLAUSE_REDUCTION:
        case OMP_CLAUSE_IN_REDUCTION:
          if (TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF
              || is_variable_sized (OMP_CLAUSE_DECL (c)))
            sctx.max_vf = 1;
+         else if (omp_is_reference (OMP_CLAUSE_DECL (c)))
+           {
+             tree rtype = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
+             if (!TREE_CONSTANT (TYPE_SIZE_UNIT (rtype)))
+               sctx.max_vf = 1;
+           }
          break;
        case OMP_CLAUSE_IF:
          if (integer_zerop (OMP_CLAUSE_IF_EXPR (c)))
@@ -4665,8 +4677,8 @@ lower_rec_input_clauses (tree clauses, g
                  /* For reduction in SIMD loop, defer adding the
                     initialization of the reference, because if we decide
                     to use SIMD array for it, the initilization could cause
-                    expansion ICE.  */
-                 if (c_kind == OMP_CLAUSE_REDUCTION && is_simd)
+                    expansion ICE.  Ditto for other privatization clauses.  */
+                 if (is_simd)
                    x = NULL_TREE;
                  else
                    {
@@ -4777,10 +4789,21 @@ lower_rec_input_clauses (tree clauses, g
                  tree y = lang_hooks.decls.omp_clause_dtor (c, new_var);
                  if ((TREE_ADDRESSABLE (new_var) || nx || y
                       || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
-                      || OMP_CLAUSE_CODE (c) == OMP_CLAUSE__CONDTEMP_)
+                      || OMP_CLAUSE_CODE (c) == OMP_CLAUSE__CONDTEMP_
+                      || omp_is_reference (var))
                      && lower_rec_simd_input_clauses (new_var, ctx, &sctx,
                                                       ivar, lvar))
                    {
+                     if (omp_is_reference (var))
+                       {
+                         gcc_assert (TREE_CODE (new_var) == MEM_REF);
+                         tree new_vard = TREE_OPERAND (new_var, 0);
+                         gcc_assert (DECL_P (new_vard));
+                         SET_DECL_VALUE_EXPR (new_vard,
+                                              build_fold_addr_expr (lvar));
+                         DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
+                       }
+
                      if (nx)
                        x = lang_hooks.decls.omp_clause_default_ctor
                                                (c, unshare_expr (ivar), x);
@@ -4844,6 +4867,24 @@ lower_rec_input_clauses (tree clauses, g
                        }
                      break;
                    }
+                 if (omp_is_reference (var))
+                   {
+                     gcc_assert (TREE_CODE (new_var) == MEM_REF);
+                     tree new_vard = TREE_OPERAND (new_var, 0);
+                     gcc_assert (DECL_P (new_vard));
+                     tree type = TREE_TYPE (TREE_TYPE (new_vard));
+                     x = TYPE_SIZE_UNIT (type);
+                     if (TREE_CONSTANT (x))
+                       {
+                         x = create_tmp_var_raw (type, get_name (var));
+                         gimple_add_tmp_var (x);
+                         TREE_ADDRESSABLE (x) = 1;
+                         x = build_fold_addr_expr_loc (clause_loc, x);
+                         x = fold_convert_loc (clause_loc,
+                                               TREE_TYPE (new_vard), x);
+                         gimplify_assign (new_vard, x, ilist);
+                       }
+                   }
                }
              if (nx)
                gimplify_and_add (nx, ilist);
@@ -4931,6 +4972,28 @@ lower_rec_input_clauses (tree clauses, g
 
                      if (OMP_CLAUSE_LINEAR_ARRAY (c))
                        {
+                         if (omp_is_reference (var))
+                           {
+                             gcc_assert (TREE_CODE (new_var) == MEM_REF);
+                             tree new_vard = TREE_OPERAND (new_var, 0);
+                             gcc_assert (DECL_P (new_vard));
+                             tree type = TREE_TYPE (TREE_TYPE (new_vard));
+                             nx = TYPE_SIZE_UNIT (type);
+                             if (TREE_CONSTANT (nx))
+                               {
+                                 nx = create_tmp_var_raw (type,
+                                                          get_name (var));
+                                 gimple_add_tmp_var (nx);
+                                 TREE_ADDRESSABLE (nx) = 1;
+                                 nx = build_fold_addr_expr_loc (clause_loc,
+                                                                nx);
+                                 nx = fold_convert_loc (clause_loc,
+                                                        TREE_TYPE (new_vard),
+                                                        nx);
+                                 gimplify_assign (new_vard, nx, ilist);
+                               }
+                           }
+
                          x = lang_hooks.decls.omp_clause_linear_ctor
                                                        (c, new_var, x, t);
                          gimplify_and_add (x, ilist);
@@ -4945,10 +5008,20 @@ lower_rec_input_clauses (tree clauses, g
                    }
 
                  if ((OMP_CLAUSE_CODE (c) != OMP_CLAUSE_LINEAR
-                      || TREE_ADDRESSABLE (new_var))
+                      || TREE_ADDRESSABLE (new_var)
+                      || omp_is_reference (var))
                      && lower_rec_simd_input_clauses (new_var, ctx, &sctx,
                                                       ivar, lvar))
                    {
+                     if (omp_is_reference (var))
+                       {
+                         gcc_assert (TREE_CODE (new_var) == MEM_REF);
+                         tree new_vard = TREE_OPERAND (new_var, 0);
+                         gcc_assert (DECL_P (new_vard));
+                         SET_DECL_VALUE_EXPR (new_vard,
+                                              build_fold_addr_expr (lvar));
+                         DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
+                       }
                      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR)
                        {
                          tree iv = create_tmp_var (TREE_TYPE (new_var));
@@ -4983,6 +5056,24 @@ lower_rec_input_clauses (tree clauses, g
                        }
                      break;
                    }
+                 if (omp_is_reference (var))
+                   {
+                     gcc_assert (TREE_CODE (new_var) == MEM_REF);
+                     tree new_vard = TREE_OPERAND (new_var, 0);
+                     gcc_assert (DECL_P (new_vard));
+                     tree type = TREE_TYPE (TREE_TYPE (new_vard));
+                     nx = TYPE_SIZE_UNIT (type);
+                     if (TREE_CONSTANT (nx))
+                       {
+                         nx = create_tmp_var_raw (type, get_name (var));
+                         gimple_add_tmp_var (nx);
+                         TREE_ADDRESSABLE (nx) = 1;
+                         nx = build_fold_addr_expr_loc (clause_loc, nx);
+                         nx = fold_convert_loc (clause_loc,
+                                                TREE_TYPE (new_vard), nx);
+                         gimplify_assign (new_vard, nx, ilist);
+                       }
+                   }
                }
              x = lang_hooks.decls.omp_clause_copy_ctor
                                                (c, unshare_expr (new_var), x);
--- gcc/testsuite/g++.dg/vect/simd-1.cc.jj      2019-06-04 15:17:42.046292355 
+0200
+++ gcc/testsuite/g++.dg/vect/simd-1.cc 2019-06-04 15:20:29.397722531 +0200
@@ -0,0 +1,114 @@
+// { dg-require-effective-target vect_simd_clones }
+// { dg-additional-options "-fopenmp-simd" }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+
+#include "../../gcc.dg/vect/tree-vect.h"
+
+int w;
+struct S {
+  int s, &t;
+  int *p;
+  S (int *x) : s (0), t (w), p (x) {};
+  void foo (short &, int &);
+  void bar (short &, int &);
+  void baz (short &, int &);
+  void qux (short &, int &);
+};
+
+__attribute__((noipa)) void
+S::foo (short &x, int &y)
+{
+  int *q = this->p;
+  #pragma omp simd lastprivate (x, s, t) private (y)
+  for (int i = 0; i < 1025; ++i)
+    {
+      y = q[i];
+      x = y;
+      q[i] = y * 2;
+      s = q[i] + 3;
+      t = q[i] + 6;
+    }
+}
+
+__attribute__((noipa)) void
+S::bar (short &x, int &y)
+{
+  #pragma omp simd linear (x) linear (s, t: 2) private (y)
+  for (int i = 0; i < 1025; ++i)
+    {
+      y = p[i];
+      x += y - 2 * i + 1;
+      p[i] = y * 2;
+      s += 2 * y - 4 * i + 2;
+      t += 2 * y - 4 * i + 2;
+    }
+}
+
+__attribute__((noipa)) void
+S::baz (short &x, int &y)
+{
+  int *q = this->p;
+  #pragma omp simd lastprivate (x, s, t) private (y) if (simd: 0)
+  for (int i = 0; i < 1025; ++i)
+    {
+      y = q[i];
+      x = y;
+      q[i] = y * 2;
+      s = q[i] + 3;
+      t = q[i] + 6;
+    }
+}
+
+__attribute__((noipa)) void
+S::qux (short &x, int &y)
+{
+  #pragma omp simd linear (x) linear (s, t: 2) private (y) simdlen (1)
+  for (int i = 0; i < 1025; ++i)
+    {
+      y = p[i];
+      x += y - 2 * i + 1;
+      p[i] = y * 2;
+      s += 2 * y - 4 * i + 2;
+      t += 2 * y - 4 * i + 2;
+    }
+}
+
+int
+main ()
+{
+  short x;
+  int a[1025], y;
+  check_vect ();
+  S s = a;
+  for (int i = 0; i < 1025; ++i)
+    {
+      a[i] = i;
+      asm volatile ("" : "+g" (i));
+    }
+  s.foo (x, y);
+  if (x != 1024 || s.s != 2051 || s.t != 2054)
+    abort ();
+  for (int i = 0; i < 1025; ++i)
+    if (a[i] != 2 * i)
+      abort ();
+  s.bar (x, y);
+  if (x != 2049 || s.s != 4101 || s.t != 4104)
+    abort ();
+  for (int i = 0; i < 1025; ++i)
+    if (a[i] != 4 * i)
+      abort ();
+    else
+      a[i] = i;
+  s.baz (x, y);
+  if (x != 1024 || s.s != 2051 || s.t != 2054)
+    abort ();
+  for (int i = 0; i < 1025; ++i)
+    if (a[i] != 2 * i)
+      abort ();
+  s.qux (x, y);
+  if (x != 2049 || s.s != 4101 || s.t != 4104)
+    abort ();
+  for (int i = 0; i < 1025; ++i)
+    if (a[i] != 4 * i)
+      abort ();
+}

        Jakub

Reply via email to