https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92420

            Bug ID: 92420
           Summary: [7/8/9/10 Regression] Vectorization miscompilation
                    with negative strides since r238039
           Product: gcc
           Version: 10.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: jakub at gcc dot gnu.org
  Target Milestone: ---

The following testcase is miscompiled on x86_64-linux, e.g. with -O3 -mavx2 or
-O3 -mssse3 since r238039:

#define N 16
struct C { int r, i; };
struct C a[N], b[N], c[N], d[N], e[N];

__attribute__((noipa)) static void
foo (struct C *__restrict x, struct C *__restrict y, struct C *__restrict z,
int w)
{
  int i;
  for (int i = 0; i < w; i++)
    {
      z[i].r = x[i].r * y[-1 - i].r - x[i].i * y[-1 - i].i;
      z[i].i = x[i].i * y[-1 - i].r + x[i].r * y[-1 - i].i;
    }
}

__attribute__((noipa)) static void
bar (struct C *__restrict x, struct C *__restrict y, struct C *__restrict z,
int w)
{
  int i;
  for (int i = 0; i < w; i++)
    {
      z[i].r = x[i].r * y[i].r - x[i].i * y[i].i;
      z[i].i = x[i].i * y[i].r + x[i].r * y[i].i;
    }
}

int
main ()
{
  int i;
  for (i = 0; i < N; ++i)
    {
      a[i].r = N - i; a[i].i = i - N;
      b[i].r = i - N; b[i].i = i + N;
      c[i].r = -1 - i; c[i].i = 2 * N - 1 - i;
    }
  foo (a, b + N, d, N);
  bar (a, c, e, N);
  for (i = 0; i < N; ++i)
    if (d[i].r != e[i].r || d[i].i != e[i].i)
      __builtin_abort ();
  return 0;
}

In bar which looks correct it is:
  vect__6.87_69 = MEM[base: y_21(D), index: ivtmp.133_9, offset: 0B];
  vect__6.88_70 = VEC_PERM_EXPR <vect__6.87_69, vect__6.87_69, { 0, 0, 2, 2, 4,
4, 6, 6 }>;
  vect__4.84_66 = MEM[base: x_20(D), index: ivtmp.133_9, offset: 0B];
  vect__4.93_75 = VEC_PERM_EXPR <vect__4.84_66, vect__4.84_66, { 1, 0, 3, 2, 5,
4, 7, 6 }>;
  vect__6.97_79 = VEC_PERM_EXPR <vect__6.87_69, vect__6.87_69, { 1, 1, 3, 3, 5,
5, 7, 7 }>;
  vect__7.89_71 = vect__4.84_66 * vect__6.88_70;
  vect__10.98_80 = vect__4.93_75 * vect__6.97_79;
  vect__12.99_81 = vect__7.89_71 - vect__10.98_80;
  vect__12.100_82 = vect__7.89_71 + vect__10.98_80;
  _83 = VEC_PERM_EXPR <vect__12.99_81, vect__12.100_82, { 0, 9, 2, 11, 4, 13,
6, 15 }>;
  MEM[base: z_22(D), index: ivtmp.133_9, offset: 0B] = _83;
foo has the y pointer iterating with -8 step rather than 8, so I'd expect the x
related permutations
to stay and for y to start with y_21(D) - 32B and use { 7, 7, 5, 5, 3, 3, 1, 1
} and { 6, 6, 4, 4, 2, 2, 0, 0 }
permutations, but we actually emit instead:
  _34 = (void *) ivtmp.64_69;
  vect__9.16_80 = MEM[base: _34, offset: 0B];
  vect__9.17_81 = VEC_PERM_EXPR <vect__9.16_80, vect__9.16_80, { 0, 0, 2, 2, 4,
4, 6, 6 }>;
  vect__4.13_76 = MEM[base: x_23(D), index: ivtmp.62_71, offset: 0B];
  vect__4.22_86 = VEC_PERM_EXPR <vect__4.13_76, vect__4.13_76, { 1, 0, 3, 2, 5,
4, 7, 6 }>;
  vect__12.25_90 = MEM[base: _34, offset: 4B];
  vect__12.26_91 = VEC_PERM_EXPR <vect__12.25_90, vect__12.25_90, { 0, 0, 2, 2,
4, 4, 6, 6 }>;
  vect__10.18_82 = vect__4.13_76 * vect__9.17_81;
  vect__13.27_92 = vect__4.22_86 * vect__12.26_91;
  vect__15.28_93 = vect__10.18_82 - vect__13.27_92;
  vect__15.29_94 = vect__10.18_82 + vect__13.27_92;
  _95 = VEC_PERM_EXPR <vect__15.28_93, vect__15.29_94, { 0, 9, 2, 11, 4, 13, 6,
15 }>;
  MEM[base: z_25(D), index: ivtmp.62_71, offset: 0B] = _95;
where ivtmp.64_69 starts at y_21(D) - 8 (!) and with step -32.

Reply via email to