https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122545

            Bug ID: 122545
           Summary: attribute optimize("no-signed-zeros",
                    "finite-math-only") prevents inlining or function
                    specialization
           Product: gcc
           Version: 16.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: manu at gcc dot gnu.org
  Target Milestone: ---

https://godbolt.org/z/PKoGvYWMz

Testcase will be in the next comment.

1. gcc -O3 -march=x86-64-v3 -fopt-info-vec-optimized-missed
-D_attr_finite_math= -D_attr_finite_math_helper=

Nothing is vectorized. epsilon_helper_() is completely inlined.

2. gcc -O3 -march=x86-64-v3 -fopt-info-vec-optimized-missed -U_attr_finite_math
-U_attr_finite_math_helper

The inner loop is vectorized. epsilon_helper_() is completely inlined.

3. gcc -O3 -march=x86-64-v3 -fopt-info-vec-optimized-missed
-D_attr_finite_math= -U_attr_finite_math_helper

The inner loop is vectorized. epsilon_helper_() is NOT completely inlined.


Vectorization requires finite math. OK fair enough. 

But then, unless the callers are annotated as well, then it will prevent
inlining. This is similar to bug 96505. 

It is understandable that not everything that can be put in "optimize" can be
inlined. However, it should be possible to assert that within a range, the
compiler can assume finite-math and no-signed-zeros. In particular, it should
be possible to create vectorized specializations for all 6 variants being
inlined.


#include <math.h>
#include <stdint.h>
#include <stdlib.h>
typedef uint_fast8_t dimension_t;

enum objs_agree_t { AGREE_MINIMISE = -1, AGREE_NONE = 0, AGREE_MAXIMISE = 1 };

#define MAX(a,b) fmax(a,b)
#define MIN(a,b) fmin(a,b)
#ifndef _attr_finite_math
#define _attr_finite_math __attribute__((optimize("no-signed-zeros",
"finite-math-only")))
#endif
#ifndef _attr_finite_math_helper
#define _attr_finite_math_helper __attribute__((optimize("no-signed-zeros",
"finite-math-only")))
#endif

#define eps_value_(do_ratio, a, b) ((do_ratio) ? (a) / (b) : (a) - (b))

_attr_finite_math_helper
static inline double
epsilon_helper_(bool do_mult, const enum objs_agree_t agree,
                const signed char * restrict minmax, dimension_t dim,
                const double * restrict points_a, size_t size_a,
                const double * restrict points_b, size_t size_b)
{
    double epsilon = do_mult ? 0 : -INFINITY;
    for (size_t b = 0; b < size_b; b++) {
        bool skip_max = false;
        double epsilon_min = INFINITY;
        const double * restrict pb = &points_b[b * dim];
        for (size_t a = 0; a < size_a; a++) {
            const double * restrict pa = &points_a[a * dim];
            double epsilon_max = (agree == AGREE_NONE)
                ? MAX(minmax[0] * eps_value_(do_mult, pb[0], pa[0]),
                      minmax[1] * eps_value_(do_mult, pb[1], pa[1]))
                : ((agree == AGREE_MINIMISE)
                   ? MAX(eps_value_(do_mult, pa[0], pb[0]), eps_value_(do_mult,
pa[1], pb[1]))
                   : MAX(eps_value_(do_mult, pb[0], pa[0]), eps_value_(do_mult,
pb[1], pa[1])));

            if (epsilon_max >= epsilon_min)
                continue;

            for (dimension_t d = 2; d < dim; d++) {
                double epsilon_temp = (agree == AGREE_NONE)
                    ? minmax[d] * eps_value_(do_mult, pb[d], pa[d])
                    : ((agree == AGREE_MINIMISE)
                       ? eps_value_(do_mult, pa[d], pb[d])
                       : eps_value_(do_mult, pb[d], pa[d]));
                epsilon_max = MAX(epsilon_max, epsilon_temp);
            }

            if (epsilon_max <= epsilon) {
                skip_max = true;
                break;
            }
            epsilon_min = MIN(epsilon_min, epsilon_max);
        }
        if (skip_max) continue;
        epsilon = MAX(epsilon, epsilon_min);
    }
    return epsilon;
}

_attr_finite_math
double
epsilon_mult_agree_none(const signed char * restrict minmax, dimension_t dim,
                        const double * restrict points_a, size_t size_a,
                        const double * restrict points_b, size_t size_b)
{
    return epsilon_helper_(/* do_mult=*/true, AGREE_NONE, minmax, dim,
points_a, size_a, points_b, size_b);
}

_attr_finite_math
double
epsilon_mult_agree_min(dimension_t dim,
                        const double * restrict points_a, size_t size_a,
                        const double * restrict points_b, size_t size_b)
{
    return epsilon_helper_(/* do_mult=*/true, AGREE_MINIMISE, /*minmax=*/NULL,
dim, points_a, size_a, points_b, size_b);
}

_attr_finite_math
double
epsilon_mult_agree_max(dimension_t dim,
                        const double * restrict points_a, size_t size_a,
                        const double * restrict points_b, size_t size_b)
{
    return epsilon_helper_(/* do_mult=*/true, AGREE_MAXIMISE, /*minmax=*/NULL,
dim, points_a, size_a, points_b, size_b);
}


_attr_finite_math
double
epsilon_addi_agree_none(const signed char * restrict minmax, dimension_t dim,
                        const double * restrict points_a, size_t size_a,
                        const double * restrict points_b, size_t size_b)
{
    return epsilon_helper_(/* do_mult=*/false, AGREE_NONE, minmax, dim,
points_a, size_a, points_b, size_b);
}

_attr_finite_math
double
epsilon_addi_agree_min(dimension_t dim,
                        const double * restrict points_a, size_t size_a,
                        const double * restrict points_b, size_t size_b)
{
    return epsilon_helper_(/* do_mult=*/false, AGREE_MINIMISE, /*minmax=*/NULL,
dim, points_a, size_a, points_b, size_b);
}

_attr_finite_math
double
epsilon_addi_agree_max(dimension_t dim,
                        const double * restrict points_a, size_t size_a,
                        const double * restrict points_b, size_t size_b)
{
    return epsilon_helper_(/* do_mult=*/false, AGREE_MAXIMISE, /*minmax=*/NULL,
dim, points_a, size_a, points_b, size_b);
}

Reply via email to