Hi! These instructions are available in AVX512VL, so we can use XMM16+ in there.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2016-05-18 Jakub Jelinek <ja...@redhat.com> * config/i386/sse.md (avx2_vec_dupv4df): Use v instead of x constraint, use maybe_evex prefix instead of vex. (vec_dupv4sf): Use v constraint instead of x for output operand except for noavx alternative, use Yv constraint instead of x for input. Use maybe_evex prefix instead of vex. (*vec_dupv4si): Likewise. (*vec_dupv2di): Likewise. * gcc.target/i386/avx512vl-vbroadcast-1.c: New test. --- gcc/config/i386/sse.md.jj 2016-05-18 11:24:21.000000000 +0200 +++ gcc/config/i386/sse.md 2016-05-18 12:30:50.929220572 +0200 @@ -16880,15 +16880,15 @@ (define_insn "avx2_permv2ti" (set_attr "mode" "OI")]) (define_insn "avx2_vec_dupv4df" - [(set (match_operand:V4DF 0 "register_operand" "=x") + [(set (match_operand:V4DF 0 "register_operand" "=v") (vec_duplicate:V4DF (vec_select:DF - (match_operand:V2DF 1 "register_operand" "x") + (match_operand:V2DF 1 "register_operand" "v") (parallel [(const_int 0)]))))] "TARGET_AVX2" "vbroadcastsd\t{%1, %0|%0, %1}" [(set_attr "type" "sselog1") - (set_attr "prefix" "vex") + (set_attr "prefix" "maybe_evex") (set_attr "mode" "V4DF")]) (define_insn "<avx512>_vec_dup<mode>_1" @@ -16991,9 +16991,9 @@ (define_insn "<mask_codefor><avx512>_vec (const_int 1)))]) (define_insn "vec_dupv4sf" - [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") + [(set (match_operand:V4SF 0 "register_operand" "=v,v,x") (vec_duplicate:V4SF - (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))] + (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))] "TARGET_SSE" "@ vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0} @@ -17003,13 +17003,13 @@ (define_insn "vec_dupv4sf" (set_attr "type" "sseshuf1,ssemov,sseshuf1") (set_attr "length_immediate" "1,0,1") (set_attr "prefix_extra" "0,1,*") - (set_attr "prefix" "vex,vex,orig") + (set_attr "prefix" "maybe_evex,maybe_evex,orig") (set_attr "mode" "V4SF")]) (define_insn "*vec_dupv4si" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + [(set (match_operand:V4SI 0 "register_operand" "=v,v,x") (vec_duplicate:V4SI - (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))] + (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))] "TARGET_SSE" "@ %vpshufd\t{$0, %1, %0|%0, %1, 0} @@ -17019,13 +17019,13 @@ (define_insn "*vec_dupv4si" (set_attr "type" "sselog1,ssemov,sselog1") (set_attr "length_immediate" "1,0,1") (set_attr "prefix_extra" "0,1,*") - (set_attr "prefix" "maybe_vex,vex,orig") + (set_attr "prefix" "maybe_vex,maybe_evex,orig") (set_attr "mode" "TI,V4SF,V4SF")]) (define_insn "*vec_dupv2di" - [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x") + [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x") (vec_duplicate:V2DI - (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))] + (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,m,0")))] "TARGET_SSE" "@ punpcklqdq\t%0, %0 @@ -17034,7 +17034,7 @@ (define_insn "*vec_dupv2di" movlhps\t%0, %0" [(set_attr "isa" "sse2_noavx,avx,sse3,noavx") (set_attr "type" "sselog1,sselog1,sselog1,ssemov") - (set_attr "prefix" "orig,vex,maybe_vex,orig") + (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig") (set_attr "mode" "TI,TI,DF,V4SF")]) (define_insn "avx2_vbroadcasti128_<mode>" --- gcc/testsuite/gcc.target/i386/avx512vl-vbroadcast-1.c.jj 2016-05-18 12:31:29.486693255 +0200 +++ gcc/testsuite/gcc.target/i386/avx512vl-vbroadcast-1.c 2016-05-18 12:33:41.202891888 +0200 @@ -0,0 +1,41 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl" } */ + +#include <x86intrin.h> + +void +f1 (__m128d x) +{ + register __m128d a __asm ("xmm16"); + register __m256d b __asm ("xmm17"); + a = x; + asm volatile ("" : "+v" (a)); + b = _mm256_broadcastsd_pd (a); + asm volatile ("" : "+v" (b)); +} + +/* { dg-final { scan-assembler "vbroadcastsd\[^\n\r]*(xmm16\[^\n\r]*ymm17|ymm17\[^\n\r]*xmm16)" } } */ + +void +f2 (float const *x) +{ + register __m128 a __asm ("xmm16"); + a = _mm_broadcast_ss (x); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vbroadcastss\[^\n\r]*(\\)\[^\n\r]*xmm16|xmm16\[^\n\r]*PTR)" } } */ + +void +f3 (float x) +{ + register float a __asm ("xmm16"); + register __m128 b __asm ("xmm17"); + a = x; + asm volatile ("" : "+v" (a)); + float c = a; + b = _mm_broadcast_ss (&c); + asm volatile ("" : "+v" (b)); +} + +/* { dg-final { scan-assembler "vbroadcastss\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" } } */ Jakub