https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92420
Bug ID: 92420 Summary: [7/8/9/10 Regression] Vectorization miscompilation with negative strides since r238039 Product: gcc Version: 10.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: jakub at gcc dot gnu.org Target Milestone: --- The following testcase is miscompiled on x86_64-linux, e.g. with -O3 -mavx2 or -O3 -mssse3 since r238039: #define N 16 struct C { int r, i; }; struct C a[N], b[N], c[N], d[N], e[N]; __attribute__((noipa)) static void foo (struct C *__restrict x, struct C *__restrict y, struct C *__restrict z, int w) { int i; for (int i = 0; i < w; i++) { z[i].r = x[i].r * y[-1 - i].r - x[i].i * y[-1 - i].i; z[i].i = x[i].i * y[-1 - i].r + x[i].r * y[-1 - i].i; } } __attribute__((noipa)) static void bar (struct C *__restrict x, struct C *__restrict y, struct C *__restrict z, int w) { int i; for (int i = 0; i < w; i++) { z[i].r = x[i].r * y[i].r - x[i].i * y[i].i; z[i].i = x[i].i * y[i].r + x[i].r * y[i].i; } } int main () { int i; for (i = 0; i < N; ++i) { a[i].r = N - i; a[i].i = i - N; b[i].r = i - N; b[i].i = i + N; c[i].r = -1 - i; c[i].i = 2 * N - 1 - i; } foo (a, b + N, d, N); bar (a, c, e, N); for (i = 0; i < N; ++i) if (d[i].r != e[i].r || d[i].i != e[i].i) __builtin_abort (); return 0; } In bar which looks correct it is: vect__6.87_69 = MEM[base: y_21(D), index: ivtmp.133_9, offset: 0B]; vect__6.88_70 = VEC_PERM_EXPR <vect__6.87_69, vect__6.87_69, { 0, 0, 2, 2, 4, 4, 6, 6 }>; vect__4.84_66 = MEM[base: x_20(D), index: ivtmp.133_9, offset: 0B]; vect__4.93_75 = VEC_PERM_EXPR <vect__4.84_66, vect__4.84_66, { 1, 0, 3, 2, 5, 4, 7, 6 }>; vect__6.97_79 = VEC_PERM_EXPR <vect__6.87_69, vect__6.87_69, { 1, 1, 3, 3, 5, 5, 7, 7 }>; vect__7.89_71 = vect__4.84_66 * vect__6.88_70; vect__10.98_80 = vect__4.93_75 * vect__6.97_79; vect__12.99_81 = vect__7.89_71 - vect__10.98_80; vect__12.100_82 = vect__7.89_71 + vect__10.98_80; _83 = VEC_PERM_EXPR <vect__12.99_81, vect__12.100_82, { 0, 9, 2, 11, 4, 13, 6, 15 }>; MEM[base: z_22(D), index: ivtmp.133_9, offset: 0B] = _83; foo has the y pointer iterating with -8 step rather than 8, so I'd expect the x related permutations to stay and for y to start with y_21(D) - 32B and use { 7, 7, 5, 5, 3, 3, 1, 1 } and { 6, 6, 4, 4, 2, 2, 0, 0 } permutations, but we actually emit instead: _34 = (void *) ivtmp.64_69; vect__9.16_80 = MEM[base: _34, offset: 0B]; vect__9.17_81 = VEC_PERM_EXPR <vect__9.16_80, vect__9.16_80, { 0, 0, 2, 2, 4, 4, 6, 6 }>; vect__4.13_76 = MEM[base: x_23(D), index: ivtmp.62_71, offset: 0B]; vect__4.22_86 = VEC_PERM_EXPR <vect__4.13_76, vect__4.13_76, { 1, 0, 3, 2, 5, 4, 7, 6 }>; vect__12.25_90 = MEM[base: _34, offset: 4B]; vect__12.26_91 = VEC_PERM_EXPR <vect__12.25_90, vect__12.25_90, { 0, 0, 2, 2, 4, 4, 6, 6 }>; vect__10.18_82 = vect__4.13_76 * vect__9.17_81; vect__13.27_92 = vect__4.22_86 * vect__12.26_91; vect__15.28_93 = vect__10.18_82 - vect__13.27_92; vect__15.29_94 = vect__10.18_82 + vect__13.27_92; _95 = VEC_PERM_EXPR <vect__15.28_93, vect__15.29_94, { 0, 9, 2, 11, 4, 13, 6, 15 }>; MEM[base: z_25(D), index: ivtmp.62_71, offset: 0B] = _95; where ivtmp.64_69 starts at y_21(D) - 8 (!) and with step -32.