OMP safelen handling is assigning backend provided max as an int even when the pragma didn’t provide one. As a result, vectoriser is rejecting SVE modes while comparing poly_int with the safelen.
That is, for the attached test case, omp_max_vf gets [16, 16] from the backend. This then becomes 16 as omp safelen is an integer. When vectoriser compares the potential vector mode with maybe_lt (max_vf, min_vf)) , this would fail resulting in any SVE vector mode being selected. One suggestion there was to set safelen to INT_MAX when OMP pragma does not provide safely explicitly. Bootstrapped and regression tested on aarch64-linux-gnu. Is this OK for trunk. Thanks, Kugan PR middle-end/114635 PR 114635 gcc/ChangeLog: * omp-low.cc (lower_rec_input_clauses): Set INT_MAX when safelen is not provided instead of using backend provided safelen. gcc/testsuite/ChangeLog: * c-c++-common/pr114635-1.cpp: New test. * c-c++-common/pr114635-2.cpp: New test. Signed-off-by: Kugan Vivekanandarajah <kvivekana...@nvidia.com>
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc index 4d003f42098..69feedbde54 100644 --- a/gcc/omp-low.cc +++ b/gcc/omp-low.cc @@ -6980,6 +6980,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, || (poly_int_tree_p (OMP_CLAUSE_SAFELEN_EXPR (c), &safe_len) && maybe_gt (safe_len, sctx.max_vf))) { + if (!sctx.is_simt && maybe_ne (sctx.max_vf, 1U)) + sctx.max_vf = INT_MAX; c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN); OMP_CLAUSE_SAFELEN_EXPR (c) = build_int_cst (integer_type_node, sctx.max_vf); diff --git a/gcc/testsuite/c-c++-common/pr114635-1.cpp b/gcc/testsuite/c-c++-common/pr114635-1.cpp new file mode 100644 index 00000000000..9bf52ba85b0 --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr114635-1.cpp @@ -0,0 +1,60 @@ + +/* PR middle-end/114635 */ +/* { dg-do compile } */ +/* { dg-options "-fopenmp -O3 -fdump-tree-omplower" } */ +namespace std { + inline constexpr float + sqrt(float __x) + { return __builtin_sqrtf(__x); } +} +extern const float PolyCoefficients4[] = { + 0.263729f, -0.0686285f, 0.00882248f, -0.000592487f, 0.0000164622f +}; + +template <int PolyOrder, const float (&PolyCoefficients)[PolyOrder+1]> +static void GravityForceKernel(int n, float *__restrict__ x, float *__restrict__ y, + float *__restrict__ z, float *__restrict__ mass, + float x0, float y0, float z0, + float MaxSepSqrd, float SofteningLenSqrd, + float &__restrict__ ax, float &__restrict__ ay, + float &__restrict__ az) { + float lax = 0.0f, lay = 0.0f, laz = 0.0f; + +#pragma omp simd reduction(+:lax,lay,laz) + + for (int i = 0; i < n; ++i) { + float dx = x[i] - x0, dy = y[i] - y0, dz = z[i] - z0; + float r2 = dx * dx + dy * dy + dz * dz; + + if (r2 >= MaxSepSqrd || r2 == 0.0f) + continue; + + float r2s = r2 + SofteningLenSqrd; + float f = PolyCoefficients[PolyOrder]; + for (int p = 1; p <= PolyOrder; ++p) + f = PolyCoefficients[PolyOrder-p] + r2*f; + + f = (1.0f / (r2s * std::sqrt(r2s)) - f) * mass[i]; + + lax += f * dx; + lay += f * dy; + laz += f * dz; + } + + ax += lax; + ay += lay; + az += laz; +} + +void GravityForceKernel4(int n, float *__restrict__ x, float *__restrict__ y, + float *__restrict__ z, float *__restrict__ mass, + float x0, float y0, float z0, + float MaxSepSqrd, float SofteningLenSqrd, + float &__restrict__ ax, float &__restrict__ ay, + float &__restrict__ az) { + GravityForceKernel<4, PolyCoefficients4>(n, x, y, z, mass, x0, y0, z0, + MaxSepSqrd, SofteningLenSqrd, + ax, ay, az); +} + +/* { dg-final { scan-tree-dump "safelen(2147483647)" "omplower" } } */ diff --git a/gcc/testsuite/c-c++-common/pr114635-2.cpp b/gcc/testsuite/c-c++-common/pr114635-2.cpp new file mode 100644 index 00000000000..7de2c8eea73 --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr114635-2.cpp @@ -0,0 +1,61 @@ + +/* PR middle-end/114635 */ +/* { dg-do compile } */ +/* { dg-options "-fopenmp -O3 -fdump-tree-omplower" } */ + +namespace std { + inline constexpr float + sqrt(float __x) + { return __builtin_sqrtf(__x); } +} +extern const float PolyCoefficients4[] = { + 0.263729f, -0.0686285f, 0.00882248f, -0.000592487f, 0.0000164622f +}; + +template <int PolyOrder, const float (&PolyCoefficients)[PolyOrder+1]> +static void GravityForceKernel(int n, float *__restrict__ x, float *__restrict__ y, + float *__restrict__ z, float *__restrict__ mass, + float x0, float y0, float z0, + float MaxSepSqrd, float SofteningLenSqrd, + float &__restrict__ ax, float &__restrict__ ay, + float &__restrict__ az) { + float lax = 0.0f, lay = 0.0f, laz = 0.0f; + +#pragma omp simd reduction(+:lax,lay,laz) safelen(8) + + for (int i = 0; i < n; ++i) { + float dx = x[i] - x0, dy = y[i] - y0, dz = z[i] - z0; + float r2 = dx * dx + dy * dy + dz * dz; + + if (r2 >= MaxSepSqrd || r2 == 0.0f) + continue; + + float r2s = r2 + SofteningLenSqrd; + float f = PolyCoefficients[PolyOrder]; + for (int p = 1; p <= PolyOrder; ++p) + f = PolyCoefficients[PolyOrder-p] + r2*f; + + f = (1.0f / (r2s * std::sqrt(r2s)) - f) * mass[i]; + + lax += f * dx; + lay += f * dy; + laz += f * dz; + } + + ax += lax; + ay += lay; + az += laz; +} + +void GravityForceKernel4(int n, float *__restrict__ x, float *__restrict__ y, + float *__restrict__ z, float *__restrict__ mass, + float x0, float y0, float z0, + float MaxSepSqrd, float SofteningLenSqrd, + float &__restrict__ ax, float &__restrict__ ay, + float &__restrict__ az) { + GravityForceKernel<4, PolyCoefficients4>(n, x, y, z, mass, x0, y0, z0, + MaxSepSqrd, SofteningLenSqrd, + ax, ay, az); +} + +/* { dg-final { scan-tree-dump "safelen(8)" "omplower" } } */