http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53366
Jakub Jelinek <jakub at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- Status|UNCONFIRMED |NEW Last reconfirmed| |2012-05-16 CC| |jakub at gcc dot gnu.org Ever Confirmed|0 |1 --- Comment #2 from Jakub Jelinek <jakub at gcc dot gnu.org> 2012-05-16 09:30:07 UTC --- Well, that is just the first revision that actually vectorizes the loop, so it may be either a vectorizer bug, or backend bug. Here is a slightly cleaned up testcase, for -O3 -mavx (with -O3 -msse4 it doesn't get vectorized and therefore doesn't fail): struct S { double v[3]; }; struct T { struct S r, i; }; struct U { struct T j[5]; }; void foo (struct U *__restrict p1, struct U *__restrict p2, struct S l1, struct S l2, struct S l3, struct S l4, const double _Complex * __restrict x, int y, int z) { int i, j; while (y < z - 2) { for (j = 0; j < 5; ++j) { double a = __builtin_creal (x[5 * y + j]); double b = __builtin_cimag (x[5 * y + j]); double c = __builtin_creal (x[5 * (y + 2) + j]); double d = __builtin_cimag (x[5 * (y + 2) + j]); double e = __builtin_creal (x[5 * (y + 1) + j]); double f = __builtin_cimag (x[5 * (y + 1) + j]); double g = __builtin_creal (x[5 * (y + 3) + j]); double h = __builtin_cimag (x[5 * (y + 3) + j]); for (i = 0; i < 3; ++i) { p1->j[j].r.v[i] += l2.v[i] * a; p1->j[j].r.v[i] += l4.v[i] * c; p1->j[j].i.v[i] += l2.v[i] * b; p1->j[j].i.v[i] += l4.v[i] * d; p2->j[j].r.v[i] += l3.v[i] * e; p2->j[j].r.v[i] += l1.v[i] * g; p2->j[j].i.v[i] += l3.v[i] * f; p2->j[j].i.v[i] += l1.v[i] * h; } } y += 4; } } _Complex double x[5005]; struct U p1, p2; int main () { int i, j; struct S l1, l2, l3, l4; for (i = 0; i < 5005; ++i) x[i] = i + 1.0iF * (2 * i); for (i = 0; i < 3; ++i) { l1.v[i] = 1; l2.v[i] = 2; l3.v[i] = 3; l4.v[i] = 4; } foo (&p1, &p2, l1, l2, l3, l4, x, 5, 1000); for (j = 0; j < 5; ++j) for (i = 0; i < 3; ++i) if (p1.j[j].r.v[i] != 3752430 + j * 1494 || p1.j[j].i.v[i] != p1.j[j].r.v[i] * 2 || p2.j[j].r.v[i] != 2502450 + j * 996 || p2.j[j].i.v[i] != p2.j[j].r.v[i] * 2) __builtin_abort (); return 0; }