https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107160
Kewen Lin <linkw at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |rguenth at gcc dot gnu.org, | |rsandifo at gcc dot gnu.org --- Comment #7 from Kewen Lin <linkw at gcc dot gnu.org> --- One reduced test case is: ============================================================ #include <stdio.h> #include <math.h> #define N 128 float fl[N]; __attribute__ ((noipa, optimize (0))) void init () { for (int i = 0; i < N; i++) fl[i] = i; } __attribute__ ((noipa)) float foo (int n1) { float sum0, sum1, sum2, sum3; sum0 = sum1 = sum2 = sum3 = 0.0f; int n = (n1 / 4) * 4; for (int i = 0; i < n; i += 4) { sum0 += fabs (fl[i]); sum1 += fabs (fl[i + 1]); sum2 += fabs (fl[i + 2]); sum3 += fabs (fl[i + 3]); } return sum0 + sum1 + sum2 + sum3; } __attribute__ ((optimize (0))) int main () { init (); float res = foo (80); __builtin_printf ("res:%f\n", res); return 0; } ============================================================ incorrect result "res:670.000000" vs expected result "res:3160.000000" It looks it exposes one bug in vectorization reduction support. The reduction epilogue handling looks wrong, it generates gimple code like: # vect_sum3_31.16_101 = PHI <vect_sum3_31.16_97(3)> # vect_sum3_31.16_102 = PHI <vect_sum3_31.16_98(3)> # vect_sum3_31.16_103 = PHI <vect_sum3_31.16_99(3)> # vect_sum3_31.16_104 = PHI <vect_sum3_31.16_100(3)> _105 = BIT_FIELD_REF <vect_sum3_31.16_101, 32, 0>; _106 = BIT_FIELD_REF <vect_sum3_31.16_101, 32, 32>; _107 = BIT_FIELD_REF <vect_sum3_31.16_101, 32, 64>; _108 = BIT_FIELD_REF <vect_sum3_31.16_101, 32, 96>; _109 = BIT_FIELD_REF <vect_sum3_31.16_102, 32, 0>; _110 = BIT_FIELD_REF <vect_sum3_31.16_102, 32, 32>; _111 = BIT_FIELD_REF <vect_sum3_31.16_102, 32, 64>; _112 = BIT_FIELD_REF <vect_sum3_31.16_102, 32, 96>; ... it doesn't consider the reduced results vect_sum3_31.16_10{1,2,3,4} from the loop can be reduced again in loop exit block as they are in the same slp group.