OMP safelen handling is assigning backend provided max as an int even when the 
pragma didn’t provide one. As a result, vectoriser is rejecting SVE modes while 
comparing poly_int with the safelen.  

That is, for the attached test case,  omp_max_vf gets [16, 16] from the 
backend. This then becomes 16 as omp safelen is an integer. When vectoriser 
compares the potential vector mode with  maybe_lt (max_vf, min_vf)) , this 
would fail resulting in any SVE vector mode being  selected.

One suggestion there was to set safelen to INT_MAX when OMP pragma does not 
provide safely explicitly. 

Bootstrapped and regression tested on aarch64-linux-gnu. Is this OK for trunk.

Thanks,
Kugan



        PR middle-end/114635
        PR 114635

gcc/ChangeLog:

        * omp-low.cc (lower_rec_input_clauses): Set INT_MAX
        when safelen is not provided instead of using backend
        provided safelen.

gcc/testsuite/ChangeLog:

        * c-c++-common/pr114635-1.cpp: New test.
        * c-c++-common/pr114635-2.cpp: New test.

Signed-off-by: Kugan Vivekanandarajah <kvivekana...@nvidia.com>

diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 4d003f42098..69feedbde54 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -6980,6 +6980,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, 
gimple_seq *dlist,
          || (poly_int_tree_p (OMP_CLAUSE_SAFELEN_EXPR (c), &safe_len)
              && maybe_gt (safe_len, sctx.max_vf)))
        {
+         if (!sctx.is_simt && maybe_ne (sctx.max_vf, 1U))
+           sctx.max_vf = INT_MAX;
          c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
          OMP_CLAUSE_SAFELEN_EXPR (c) = build_int_cst (integer_type_node,
                                                       sctx.max_vf);
diff --git a/gcc/testsuite/c-c++-common/pr114635-1.cpp 
b/gcc/testsuite/c-c++-common/pr114635-1.cpp
new file mode 100644
index 00000000000..9bf52ba85b0
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pr114635-1.cpp
@@ -0,0 +1,60 @@
+
+/* PR middle-end/114635 */
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O3 -fdump-tree-omplower" } */
+namespace std {
+  inline constexpr float
+  sqrt(float __x)
+  { return __builtin_sqrtf(__x); }
+}
+extern const float PolyCoefficients4[] = {
+  0.263729f, -0.0686285f, 0.00882248f, -0.000592487f, 0.0000164622f
+};
+
+template <int PolyOrder, const float (&PolyCoefficients)[PolyOrder+1]>
+static void GravityForceKernel(int n, float *__restrict__ x, float 
*__restrict__ y,
+                               float *__restrict__ z, float *__restrict__ mass,
+                               float x0, float y0, float z0,
+                               float MaxSepSqrd, float SofteningLenSqrd,
+                               float &__restrict__ ax, float &__restrict__ ay,
+                               float &__restrict__ az) {
+  float lax = 0.0f, lay = 0.0f, laz = 0.0f;
+
+#pragma omp simd reduction(+:lax,lay,laz)
+
+  for (int i = 0; i < n; ++i) {
+    float dx = x[i] - x0, dy = y[i] - y0, dz = z[i] - z0;
+    float r2 = dx * dx + dy * dy + dz * dz;
+
+    if (r2 >= MaxSepSqrd || r2 == 0.0f)
+      continue;
+
+    float r2s = r2 + SofteningLenSqrd;
+    float f = PolyCoefficients[PolyOrder];
+    for (int p = 1; p <= PolyOrder; ++p)
+      f = PolyCoefficients[PolyOrder-p] + r2*f;
+
+    f = (1.0f / (r2s * std::sqrt(r2s)) - f) * mass[i];
+
+    lax += f * dx;
+    lay += f * dy;
+    laz += f * dz;
+  }
+
+  ax += lax;
+  ay += lay;
+  az += laz;
+}
+
+void GravityForceKernel4(int n, float *__restrict__ x, float *__restrict__ y,
+                         float *__restrict__ z, float *__restrict__ mass,
+                         float x0, float y0, float z0,
+                         float MaxSepSqrd, float SofteningLenSqrd,
+                         float &__restrict__ ax, float &__restrict__ ay,
+                         float &__restrict__ az) {
+  GravityForceKernel<4, PolyCoefficients4>(n, x, y, z, mass, x0, y0, z0,
+                                           MaxSepSqrd, SofteningLenSqrd,
+                                           ax, ay, az);
+}
+
+/* { dg-final { scan-tree-dump "safelen(2147483647)" "omplower" } } */
diff --git a/gcc/testsuite/c-c++-common/pr114635-2.cpp 
b/gcc/testsuite/c-c++-common/pr114635-2.cpp
new file mode 100644
index 00000000000..7de2c8eea73
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pr114635-2.cpp
@@ -0,0 +1,61 @@
+
+/* PR middle-end/114635 */
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O3 -fdump-tree-omplower" } */
+
+namespace std {
+  inline constexpr float
+  sqrt(float __x)
+  { return __builtin_sqrtf(__x); }
+}
+extern const float PolyCoefficients4[] = {
+  0.263729f, -0.0686285f, 0.00882248f, -0.000592487f, 0.0000164622f
+};
+
+template <int PolyOrder, const float (&PolyCoefficients)[PolyOrder+1]>
+static void GravityForceKernel(int n, float *__restrict__ x, float 
*__restrict__ y,
+                               float *__restrict__ z, float *__restrict__ mass,
+                               float x0, float y0, float z0,
+                               float MaxSepSqrd, float SofteningLenSqrd,
+                               float &__restrict__ ax, float &__restrict__ ay,
+                               float &__restrict__ az) {
+  float lax = 0.0f, lay = 0.0f, laz = 0.0f;
+
+#pragma omp simd reduction(+:lax,lay,laz) safelen(8)
+
+  for (int i = 0; i < n; ++i) {
+    float dx = x[i] - x0, dy = y[i] - y0, dz = z[i] - z0;
+    float r2 = dx * dx + dy * dy + dz * dz;
+
+    if (r2 >= MaxSepSqrd || r2 == 0.0f)
+      continue;
+
+    float r2s = r2 + SofteningLenSqrd;
+    float f = PolyCoefficients[PolyOrder];
+    for (int p = 1; p <= PolyOrder; ++p)
+      f = PolyCoefficients[PolyOrder-p] + r2*f;
+
+    f = (1.0f / (r2s * std::sqrt(r2s)) - f) * mass[i];
+
+    lax += f * dx;
+    lay += f * dy;
+    laz += f * dz;
+  }
+
+  ax += lax;
+  ay += lay;
+  az += laz;
+}
+
+void GravityForceKernel4(int n, float *__restrict__ x, float *__restrict__ y,
+                         float *__restrict__ z, float *__restrict__ mass,
+                         float x0, float y0, float z0,
+                         float MaxSepSqrd, float SofteningLenSqrd,
+                         float &__restrict__ ax, float &__restrict__ ay,
+                         float &__restrict__ az) {
+  GravityForceKernel<4, PolyCoefficients4>(n, x, y, z, mass, x0, y0, z0,
+                                           MaxSepSqrd, SofteningLenSqrd,
+                                           ax, ay, az);
+}
+
+/* { dg-final { scan-tree-dump "safelen(8)" "omplower" } } */

Reply via email to