Bug ID: 59501
           Summary: Vector Gather with GCC 4.9 2013-12-08 Snapshot
           Product: gcc
           Version: 4.9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot
          Reporter: freddie at witherden dot org

Compiling the following snippet with the 2013-12-08 shapshot of 4.9:

    typedef double v4d __attribute__((vector_size(32)));

    v4d gather(double *base, unsigned *offt)
        v4d tmp = { base[offt[0]], base[offt[1]], base[offt[2]],  base[offt[3]]
        return tmp;

with flags: -std=c++11 -Ofast -march=core-avx2 emits the following ASM:

0000000000000000 <_Z6gatherPdPj>:
   0:   8b 16                   mov    (%rsi),%edx
   2:   4c 8d 54 24 08          lea    0x8(%rsp),%r10
   7:   48 83 e4 e0             and    $0xffffffffffffffe0,%rsp
   b:   44 8b 46 08             mov    0x8(%rsi),%r8d
   f:   41 ff 72 f8             pushq  -0x8(%r10)
  13:   55                      push   %rbp
  14:   8b 46 04                mov    0x4(%rsi),%eax
  17:   48 89 e5                mov    %rsp,%rbp
  1a:   8b 4e 0c                mov    0xc(%rsi),%ecx
  1d:   41 52                   push   %r10
  1f:   41 5a                   pop    %r10
  21:   c4 a1 7b 10 14 c7       vmovsd (%rdi,%r8,8),%xmm2
  27:   c5 fb 10 1c d7          vmovsd (%rdi,%rdx,8),%xmm3
  2c:   c5 e9 16 0c cf          vmovhpd (%rdi,%rcx,8),%xmm2,%xmm1
  31:   5d                      pop    %rbp
  32:   c5 e1 16 04 c7          vmovhpd (%rdi,%rax,8),%xmm3,%xmm0
  37:   c4 e3 7d 18 c1 01       vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
  3d:   49 8d 62 f8             lea    -0x8(%r10),%rsp
  41:   c3                      retq   

which appears to be a regression when compared with 4.8.2:

0000000000000000 <_Z6gatherPdPj>:
   0:   8b 16                   mov    (%rsi),%edx
   2:   44 8b 46 08             mov    0x8(%rsi),%r8d
   6:   8b 46 04                mov    0x4(%rsi),%eax
   9:   8b 4e 0c                mov    0xc(%rsi),%ecx
   c:   c5 fb 10 1c d7          vmovsd (%rdi,%rdx,8),%xmm3
  11:   c4 a1 7b 10 14 c7       vmovsd (%rdi,%r8,8),%xmm2
  17:   c5 e1 16 0c c7          vmovhpd (%rdi,%rax,8),%xmm3,%xmm1
  1c:   c5 e9 16 04 cf          vmovhpd (%rdi,%rcx,8),%xmm2,%xmm0
  21:   c4 e3 75 18 c0 01       vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
  27:   c3                      retq

