OMP safelen handling is assigning backend provided max as an int even when the
pragma didn’t provide one. As a result, vectoriser is rejecting SVE modes while
comparing poly_int with the safelen.
That is, for the attached test case, omp_max_vf gets [16, 16] from the
backend. This then becomes 16 as omp safelen is an integer. When vectoriser
compares the potential vector mode with maybe_lt (max_vf, min_vf)) , this
would fail resulting in any SVE vector mode being selected.
One suggestion there was to set safelen to INT_MAX when OMP pragma does not
provide safely explicitly.
Bootstrapped and regression tested on aarch64-linux-gnu. Is this OK for trunk.
Thanks,
Kugan
PR middle-end/114635
PR 114635
gcc/ChangeLog:
* omp-low.cc (lower_rec_input_clauses): Set INT_MAX
when safelen is not provided instead of using backend
provided safelen.
gcc/testsuite/ChangeLog:
* c-c++-common/pr114635-1.cpp: New test.
* c-c++-common/pr114635-2.cpp: New test.
Signed-off-by: Kugan Vivekanandarajah <[email protected]>
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 4d003f42098..69feedbde54 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -6980,6 +6980,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist,
gimple_seq *dlist,
|| (poly_int_tree_p (OMP_CLAUSE_SAFELEN_EXPR (c), &safe_len)
&& maybe_gt (safe_len, sctx.max_vf)))
{
+ if (!sctx.is_simt && maybe_ne (sctx.max_vf, 1U))
+ sctx.max_vf = INT_MAX;
c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
OMP_CLAUSE_SAFELEN_EXPR (c) = build_int_cst (integer_type_node,
sctx.max_vf);
diff --git a/gcc/testsuite/c-c++-common/pr114635-1.cpp
b/gcc/testsuite/c-c++-common/pr114635-1.cpp
new file mode 100644
index 00000000000..9bf52ba85b0
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pr114635-1.cpp
@@ -0,0 +1,60 @@
+
+/* PR middle-end/114635 */
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O3 -fdump-tree-omplower" } */
+namespace std {
+ inline constexpr float
+ sqrt(float __x)
+ { return __builtin_sqrtf(__x); }
+}
+extern const float PolyCoefficients4[] = {
+ 0.263729f, -0.0686285f, 0.00882248f, -0.000592487f, 0.0000164622f
+};
+
+template <int PolyOrder, const float (&PolyCoefficients)[PolyOrder+1]>
+static void GravityForceKernel(int n, float *__restrict__ x, float
*__restrict__ y,
+ float *__restrict__ z, float *__restrict__ mass,
+ float x0, float y0, float z0,
+ float MaxSepSqrd, float SofteningLenSqrd,
+ float &__restrict__ ax, float &__restrict__ ay,
+ float &__restrict__ az) {
+ float lax = 0.0f, lay = 0.0f, laz = 0.0f;
+
+#pragma omp simd reduction(+:lax,lay,laz)
+
+ for (int i = 0; i < n; ++i) {
+ float dx = x[i] - x0, dy = y[i] - y0, dz = z[i] - z0;
+ float r2 = dx * dx + dy * dy + dz * dz;
+
+ if (r2 >= MaxSepSqrd || r2 == 0.0f)
+ continue;
+
+ float r2s = r2 + SofteningLenSqrd;
+ float f = PolyCoefficients[PolyOrder];
+ for (int p = 1; p <= PolyOrder; ++p)
+ f = PolyCoefficients[PolyOrder-p] + r2*f;
+
+ f = (1.0f / (r2s * std::sqrt(r2s)) - f) * mass[i];
+
+ lax += f * dx;
+ lay += f * dy;
+ laz += f * dz;
+ }
+
+ ax += lax;
+ ay += lay;
+ az += laz;
+}
+
+void GravityForceKernel4(int n, float *__restrict__ x, float *__restrict__ y,
+ float *__restrict__ z, float *__restrict__ mass,
+ float x0, float y0, float z0,
+ float MaxSepSqrd, float SofteningLenSqrd,
+ float &__restrict__ ax, float &__restrict__ ay,
+ float &__restrict__ az) {
+ GravityForceKernel<4, PolyCoefficients4>(n, x, y, z, mass, x0, y0, z0,
+ MaxSepSqrd, SofteningLenSqrd,
+ ax, ay, az);
+}
+
+/* { dg-final { scan-tree-dump "safelen(2147483647)" "omplower" } } */
diff --git a/gcc/testsuite/c-c++-common/pr114635-2.cpp
b/gcc/testsuite/c-c++-common/pr114635-2.cpp
new file mode 100644
index 00000000000..7de2c8eea73
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pr114635-2.cpp
@@ -0,0 +1,61 @@
+
+/* PR middle-end/114635 */
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O3 -fdump-tree-omplower" } */
+
+namespace std {
+ inline constexpr float
+ sqrt(float __x)
+ { return __builtin_sqrtf(__x); }
+}
+extern const float PolyCoefficients4[] = {
+ 0.263729f, -0.0686285f, 0.00882248f, -0.000592487f, 0.0000164622f
+};
+
+template <int PolyOrder, const float (&PolyCoefficients)[PolyOrder+1]>
+static void GravityForceKernel(int n, float *__restrict__ x, float
*__restrict__ y,
+ float *__restrict__ z, float *__restrict__ mass,
+ float x0, float y0, float z0,
+ float MaxSepSqrd, float SofteningLenSqrd,
+ float &__restrict__ ax, float &__restrict__ ay,
+ float &__restrict__ az) {
+ float lax = 0.0f, lay = 0.0f, laz = 0.0f;
+
+#pragma omp simd reduction(+:lax,lay,laz) safelen(8)
+
+ for (int i = 0; i < n; ++i) {
+ float dx = x[i] - x0, dy = y[i] - y0, dz = z[i] - z0;
+ float r2 = dx * dx + dy * dy + dz * dz;
+
+ if (r2 >= MaxSepSqrd || r2 == 0.0f)
+ continue;
+
+ float r2s = r2 + SofteningLenSqrd;
+ float f = PolyCoefficients[PolyOrder];
+ for (int p = 1; p <= PolyOrder; ++p)
+ f = PolyCoefficients[PolyOrder-p] + r2*f;
+
+ f = (1.0f / (r2s * std::sqrt(r2s)) - f) * mass[i];
+
+ lax += f * dx;
+ lay += f * dy;
+ laz += f * dz;
+ }
+
+ ax += lax;
+ ay += lay;
+ az += laz;
+}
+
+void GravityForceKernel4(int n, float *__restrict__ x, float *__restrict__ y,
+ float *__restrict__ z, float *__restrict__ mass,
+ float x0, float y0, float z0,
+ float MaxSepSqrd, float SofteningLenSqrd,
+ float &__restrict__ ax, float &__restrict__ ay,
+ float &__restrict__ az) {
+ GravityForceKernel<4, PolyCoefficients4>(n, x, y, z, mass, x0, y0, z0,
+ MaxSepSqrd, SofteningLenSqrd,
+ ax, ay, az);
+}
+
+/* { dg-final { scan-tree-dump "safelen(8)" "omplower" } } */