http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54716



--- Comment #7 from Jakub Jelinek <jakub at gcc dot gnu.org> 2012-09-26 
15:40:53 UTC ---

On

#define vector __attribute__ ((vector_size (16)))



__attribute__((noinline, noclone))

vector float foo(vector float f, vector float h)

{

  vector int g = { 0x80000000, 0, 0x80000000, 0 };

  vector int f_int = (vector int) f;

  return ((vector float) (f_int ^ g)) + h;

}



vector float a = { 1.0, 2.0, 3.0, 4.0 }, b = { 5.0, 6.0, 7.0, 8.0 }, c = { 9.0,

10.0, 11.0, 12.0 }, r;



int

main ()

{

  int i;

  for (i = 0; i < 1000000000; i++)

    {

      asm volatile ("" : : : "memory");

      r = foo(a + b, a + c) - a;

      asm volatile ("" : : : "memory");

    }

  return 0;

}



I haven't noticed a measurable performance difference though on Intel SNB 2600

CPU though, so perhaps the patch isn't needed.

Reply via email to