https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89992

            Bug ID: 89992
           Summary: Vectorizer is very sensitive to function calls
           Product: gcc
           Version: 9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: hjl.tools at gmail dot com
  Target Milestone: ---

[hjl@gnu-cfl-1 xxx]$ cat x.c
static __inline unsigned int
__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
{
  unsigned int __eax, __ebx, __ecx, __edx;
  __asm__ ("cpuid\n\t" : "=a" (__eax), "=b" (__ebx), "=c" (__ecx), "=d" (__edx)
: "0" (__ext));

  if (__sig)
    *__sig = __ebx;

  return __eax;
}

static __inline int
__get_cpuid_count (unsigned int __leaf, unsigned int __subleaf,
     unsigned int *__eax, unsigned int *__ebx,
     unsigned int *__ecx, unsigned int *__edx)
{
  unsigned int __ext = __leaf & 0x80000000;
  unsigned int __maxlevel = __get_cpuid_max (__ext, 0);

  if (__maxlevel == 0 || __maxlevel < __leaf)
    return 0;

  __asm__ ("cpuid\n\t" : "=a" (*__eax), "=b" (*__ebx), "=c" (*__ecx), "=d"
(*__edx) : "0" (__leaf), "2" (__subleaf));
  return 1;
}
static int
avx512f_os_support (void)
{
  unsigned int eax, edx;
  unsigned int ecx = 0x0;
  unsigned int mask = 0x2 | 0x4 | 0x20 | 0x40 | 0x80;

  __asm__ ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (ecx));

  return ((eax & mask) == mask);
}

int foo (void);

extern void abort (void);
static void do_test (void);

int
main ()
{
  unsigned int eax, ebx, ecx, edx;

#ifndef WORK
  if (!__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx))
    return 0;
#endif

  if (foo () && avx512f_os_support ())
    {
      do_test ();
      return 0;
    }

  return 0;
}


float a[16] = {-0.1f, -3.2f, -6.3f, -9.4f,
   -12.5f, -15.6f, -18.7f, -21.8f,
   24.9f, 27.1f, 30.2f, 33.3f,
   36.4f, 39.5f, 42.6f, 45.7f};
float b[16] = {-1.2f, 3.4f, -5.6f, 7.8f,
   -9.0f, 1.0f, -2.0f, 3.0f,
   -4.0f, -5.0f, 6.0f, 7.0f,
   -8.0f, -9.0f, 10.0f, 11.0f};
float r[16];

static void
do_test (void)
{
  int i;

  for (i = 0; i < 16; i++)
    r[i] = a[i] * __builtin_copysignf (1.0f, b[i]);

  for (i = 0; i < 16; i++)
    if (r[i] != a[i] * __builtin_copysignf (1.0f, b[i]))
      abort ();
}
[hjl@gnu-cfl-1 xxx]$ make
/export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/ -mavx512f
-mavx512vl -O2 -ftree-vectorize -fdump-tree-vect-details  -S x.c
[hjl@gnu-cfl-1 xxx]$  grep vectorized x.c.158t.vect
x.c:45:1: note: vectorized 0 loops in function.
[hjl@gnu-cfl-1 xxx]$
/export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/ -mavx512f
-mavx512vl -O2 -ftree-vectorize -fdump-tree-vect-details  -S x.c -DWORK
[hjl@gnu-cfl-1 xxx]$  grep vectorized x.c.158t.vect
x.c:83:10: missed:   not vectorized: control flow in loop.
x.c:79:3: note:   === vect_mark_stmts_to_be_vectorized ===
x.c:79:3: optimized: loop vectorized using 64 byte vectors
x.c:45:1: note: vectorized 1 loops in function.
[hjl@gnu-cfl-1 xxx]$ 

Vectorizer doesn't kick in when there are a couple function calls.

Reply via email to