https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122545
Bug ID: 122545
Summary: attribute optimize("no-signed-zeros",
"finite-math-only") prevents inlining or function
specialization
Product: gcc
Version: 16.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: middle-end
Assignee: unassigned at gcc dot gnu.org
Reporter: manu at gcc dot gnu.org
Target Milestone: ---
https://godbolt.org/z/PKoGvYWMz
Testcase will be in the next comment.
1. gcc -O3 -march=x86-64-v3 -fopt-info-vec-optimized-missed
-D_attr_finite_math= -D_attr_finite_math_helper=
Nothing is vectorized. epsilon_helper_() is completely inlined.
2. gcc -O3 -march=x86-64-v3 -fopt-info-vec-optimized-missed -U_attr_finite_math
-U_attr_finite_math_helper
The inner loop is vectorized. epsilon_helper_() is completely inlined.
3. gcc -O3 -march=x86-64-v3 -fopt-info-vec-optimized-missed
-D_attr_finite_math= -U_attr_finite_math_helper
The inner loop is vectorized. epsilon_helper_() is NOT completely inlined.
Vectorization requires finite math. OK fair enough.
But then, unless the callers are annotated as well, then it will prevent
inlining. This is similar to bug 96505.
It is understandable that not everything that can be put in "optimize" can be
inlined. However, it should be possible to assert that within a range, the
compiler can assume finite-math and no-signed-zeros. In particular, it should
be possible to create vectorized specializations for all 6 variants being
inlined.
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
typedef uint_fast8_t dimension_t;
enum objs_agree_t { AGREE_MINIMISE = -1, AGREE_NONE = 0, AGREE_MAXIMISE = 1 };
#define MAX(a,b) fmax(a,b)
#define MIN(a,b) fmin(a,b)
#ifndef _attr_finite_math
#define _attr_finite_math __attribute__((optimize("no-signed-zeros",
"finite-math-only")))
#endif
#ifndef _attr_finite_math_helper
#define _attr_finite_math_helper __attribute__((optimize("no-signed-zeros",
"finite-math-only")))
#endif
#define eps_value_(do_ratio, a, b) ((do_ratio) ? (a) / (b) : (a) - (b))
_attr_finite_math_helper
static inline double
epsilon_helper_(bool do_mult, const enum objs_agree_t agree,
const signed char * restrict minmax, dimension_t dim,
const double * restrict points_a, size_t size_a,
const double * restrict points_b, size_t size_b)
{
double epsilon = do_mult ? 0 : -INFINITY;
for (size_t b = 0; b < size_b; b++) {
bool skip_max = false;
double epsilon_min = INFINITY;
const double * restrict pb = &points_b[b * dim];
for (size_t a = 0; a < size_a; a++) {
const double * restrict pa = &points_a[a * dim];
double epsilon_max = (agree == AGREE_NONE)
? MAX(minmax[0] * eps_value_(do_mult, pb[0], pa[0]),
minmax[1] * eps_value_(do_mult, pb[1], pa[1]))
: ((agree == AGREE_MINIMISE)
? MAX(eps_value_(do_mult, pa[0], pb[0]), eps_value_(do_mult,
pa[1], pb[1]))
: MAX(eps_value_(do_mult, pb[0], pa[0]), eps_value_(do_mult,
pb[1], pa[1])));
if (epsilon_max >= epsilon_min)
continue;
for (dimension_t d = 2; d < dim; d++) {
double epsilon_temp = (agree == AGREE_NONE)
? minmax[d] * eps_value_(do_mult, pb[d], pa[d])
: ((agree == AGREE_MINIMISE)
? eps_value_(do_mult, pa[d], pb[d])
: eps_value_(do_mult, pb[d], pa[d]));
epsilon_max = MAX(epsilon_max, epsilon_temp);
}
if (epsilon_max <= epsilon) {
skip_max = true;
break;
}
epsilon_min = MIN(epsilon_min, epsilon_max);
}
if (skip_max) continue;
epsilon = MAX(epsilon, epsilon_min);
}
return epsilon;
}
_attr_finite_math
double
epsilon_mult_agree_none(const signed char * restrict minmax, dimension_t dim,
const double * restrict points_a, size_t size_a,
const double * restrict points_b, size_t size_b)
{
return epsilon_helper_(/* do_mult=*/true, AGREE_NONE, minmax, dim,
points_a, size_a, points_b, size_b);
}
_attr_finite_math
double
epsilon_mult_agree_min(dimension_t dim,
const double * restrict points_a, size_t size_a,
const double * restrict points_b, size_t size_b)
{
return epsilon_helper_(/* do_mult=*/true, AGREE_MINIMISE, /*minmax=*/NULL,
dim, points_a, size_a, points_b, size_b);
}
_attr_finite_math
double
epsilon_mult_agree_max(dimension_t dim,
const double * restrict points_a, size_t size_a,
const double * restrict points_b, size_t size_b)
{
return epsilon_helper_(/* do_mult=*/true, AGREE_MAXIMISE, /*minmax=*/NULL,
dim, points_a, size_a, points_b, size_b);
}
_attr_finite_math
double
epsilon_addi_agree_none(const signed char * restrict minmax, dimension_t dim,
const double * restrict points_a, size_t size_a,
const double * restrict points_b, size_t size_b)
{
return epsilon_helper_(/* do_mult=*/false, AGREE_NONE, minmax, dim,
points_a, size_a, points_b, size_b);
}
_attr_finite_math
double
epsilon_addi_agree_min(dimension_t dim,
const double * restrict points_a, size_t size_a,
const double * restrict points_b, size_t size_b)
{
return epsilon_helper_(/* do_mult=*/false, AGREE_MINIMISE, /*minmax=*/NULL,
dim, points_a, size_a, points_b, size_b);
}
_attr_finite_math
double
epsilon_addi_agree_max(dimension_t dim,
const double * restrict points_a, size_t size_a,
const double * restrict points_b, size_t size_b)
{
return epsilon_helper_(/* do_mult=*/false, AGREE_MAXIMISE, /*minmax=*/NULL,
dim, points_a, size_a, points_b, size_b);
}