https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71300

            Bug ID: 71300
           Summary: Vector ABI bug for some AVX vectorized variants
           Product: gcc
           Version: 4.9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: andrew.n.senkevich at gmail dot com
  Target Milestone: ---

Hi,

according with Vector ABI vectorized variant in AVX ISA of

#pragma omp declare simd notinbranch
void callee(double, double*);

expects ymm0 filled with 4 doubles and ymm1 filled with 4 double* values.

But really double* values are passed in xmm1 and xmm2, that leads to important
ABI issue.

-bash-4.2$ cat test.cc

#pragma omp declare simd notinbranch
extern void callee(double a, double* b);

#define VLEN 4

typedef double __attribute__((vector_size(8 * VLEN))) vec;
vec x, r;

int main()
{
    for (int i = 0; i < VLEN; i++) x[i] = i;

    #pragma omp simd
    for (int i = 0; i < VLEN; i++) callee(x[i], &r[i]);

    return (int)r[VLEN-1];
}

g++ -O1 -fopenmp -ffast-math test.cc -mavx -c

-bash-4.2$ objdump -d test.o

test.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <main>:
   0:   4c 8d 54 24 08          lea    0x8(%rsp),%r10
   5:   48 83 e4 e0             and    $0xffffffffffffffe0,%rsp
   9:   41 ff 72 f8             pushq  -0x8(%r10)
   d:   55                      push   %rbp
   e:   48 89 e5                mov    %rsp,%rbp
  11:   41 52                   push   %r10
  13:   48 83 ec 28             sub    $0x28,%rsp
  17:   48 c7 05 00 00 00 00    movq   $0x0,0x0(%rip)        # 22 <main+0x22>
  1e:   00 00 00 00
  22:   c5 fb 10 1d 00 00 00    vmovsd 0x0(%rip),%xmm3        # 2a <main+0x2a>
  29:   00
  2a:   c5 fb 11 1d 00 00 00    vmovsd %xmm3,0x0(%rip)        # 32 <main+0x32>
  31:   00
  32:   c5 fb 10 25 00 00 00    vmovsd 0x0(%rip),%xmm4        # 3a <main+0x3a>
  39:   00
  3a:   c5 fb 11 25 00 00 00    vmovsd %xmm4,0x0(%rip)        # 42 <main+0x42>
  41:   00
  42:   c5 fb 10 2d 00 00 00    vmovsd 0x0(%rip),%xmm5        # 4a <main+0x4a>
  49:   00
  4a:   c5 fb 11 2d 00 00 00    vmovsd %xmm5,0x0(%rip)        # 52 <main+0x52>
  51:   00
  52:   c5 fb 12 0d 00 00 00    vmovddup 0x0(%rip),%xmm1        # 5a
<main+0x5a>
  59:   00
  5a:   c5 f9 28 3d 00 00 00    vmovapd 0x0(%rip),%xmm7        # 62 <main+0x62>
  61:   00
  62:   c5 f8 29 7d d0          vmovaps %xmm7,-0x30(%rbp)
  67:   c5 f9 28 05 00 00 00    vmovapd 0x0(%rip),%xmm0        # 6f <main+0x6f>
  6e:   00
  6f:   c5 f8 29 45 e0          vmovaps %xmm0,-0x20(%rbp)
  74:   c5 f1 d4 15 00 00 00    vpaddq 0x0(%rip),%xmm1,%xmm2        # 7c
<main+0x7c>
  7b:   00
  7c:   c5 f1 d4 0d 00 00 00    vpaddq 0x0(%rip),%xmm1,%xmm1        # 84
<main+0x84>
  83:   00
  84:   c5 fd 28 45 d0          vmovapd -0x30(%rbp),%ymm0
  89:   e8 00 00 00 00          callq  8e <main+0x8e>
  8e:   c5 fb 2c 05 00 00 00    vcvttsd2si 0x0(%rip),%eax        # 96
<main+0x96>
  95:   00
  96:   48 83 c4 28             add    $0x28,%rsp
  9a:   41 5a                   pop    %r10
  9c:   5d                      pop    %rbp
  9d:   49 8d 62 f8             lea    -0x8(%r10),%rsp
  a1:   c3                      retq

Reply via email to