http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53366

Jakub Jelinek <jakub at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
   Last reconfirmed|                            |2012-05-16
                 CC|                            |jakub at gcc dot gnu.org
     Ever Confirmed|0                           |1

--- Comment #2 from Jakub Jelinek <jakub at gcc dot gnu.org> 2012-05-16 
09:30:07 UTC ---
Well, that is just the first revision that actually vectorizes the loop, so it
may be either a vectorizer bug, or backend bug.

Here is a slightly cleaned up testcase, for -O3 -mavx (with -O3 -msse4 it
doesn't get vectorized and therefore doesn't fail):

struct S { double v[3]; };
struct T { struct S r, i; };
struct U { struct T j[5]; };

void
foo (struct U *__restrict p1, struct U *__restrict p2,
     struct S l1, struct S l2, struct S l3, struct S l4,
     const double _Complex * __restrict x, int y, int z)
{
  int i, j;
  while (y < z - 2)
    {
      for (j = 0; j < 5; ++j)
        {
          double a = __builtin_creal (x[5 * y + j]);
          double b = __builtin_cimag (x[5 * y + j]);
          double c = __builtin_creal (x[5 * (y + 2) + j]);
          double d = __builtin_cimag (x[5 * (y + 2) + j]);
          double e = __builtin_creal (x[5 * (y + 1) + j]);
          double f = __builtin_cimag (x[5 * (y + 1) + j]);
          double g = __builtin_creal (x[5 * (y + 3) + j]);
          double h = __builtin_cimag (x[5 * (y + 3) + j]);
          for (i = 0; i < 3; ++i)
            {
              p1->j[j].r.v[i] += l2.v[i] * a;
              p1->j[j].r.v[i] += l4.v[i] * c;
              p1->j[j].i.v[i] += l2.v[i] * b;
              p1->j[j].i.v[i] += l4.v[i] * d;
              p2->j[j].r.v[i] += l3.v[i] * e;
              p2->j[j].r.v[i] += l1.v[i] * g;
              p2->j[j].i.v[i] += l3.v[i] * f;
              p2->j[j].i.v[i] += l1.v[i] * h;
            }
        }
      y += 4;
    }
}

_Complex double x[5005];
struct U p1, p2;

int
main ()
{
  int i, j;
  struct S l1, l2, l3, l4;
  for (i = 0; i < 5005; ++i)
    x[i] = i + 1.0iF * (2 * i);
  for (i = 0; i < 3; ++i)
    {
      l1.v[i] = 1;
      l2.v[i] = 2;
      l3.v[i] = 3;
      l4.v[i] = 4;
    }
  foo (&p1, &p2, l1, l2, l3, l4, x, 5, 1000);
  for (j = 0; j < 5; ++j)
    for (i = 0; i < 3; ++i)
      if (p1.j[j].r.v[i] != 3752430 + j * 1494
          || p1.j[j].i.v[i] != p1.j[j].r.v[i] * 2
          || p2.j[j].r.v[i] != 2502450 + j * 996
          || p2.j[j].i.v[i] != p2.j[j].r.v[i] * 2)
        __builtin_abort ();
  return 0;
}

Reply via email to