Hi!
These insns are either AVX512VL or AVX512VL & BW, this patch allows using
XMM16+ where possible.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2016-05-13 Jakub Jelinek
* config/i386/sse.md (pbroadcast_evex_isa): New mode attr.
(avx2_pbroadcast): Add another alternative with v instead
of x constraints in it, using isa.
(avx2_pbroadcast_1): Similarly, add two such alternatives.
* gcc.target/i386/avx512bw-vpbroadcast-1.c: New test.
* gcc.target/i386/avx512bw-vpbroadcast-2.c: New test.
* gcc.target/i386/avx512bw-vpbroadcast-3.c: New test.
* gcc.target/i386/avx512vl-vpbroadcast-1.c: New test.
* gcc.target/i386/avx512vl-vpbroadcast-2.c: New test.
* gcc.target/i386/avx512vl-vpbroadcast-3.c: New test.
--- gcc/config/i386/sse.md.jj 2016-05-13 16:12:24.631965207 +0200
+++ gcc/config/i386/sse.md 2016-05-13 17:33:32.429909899 +0200
@@ -16725,30 +16725,40 @@ (define_insn "avx_vzeroupper"
(set_attr "btver2_decode" "vector")
(set_attr "mode" "OI")])
+(define_mode_attr pbroadcast_evex_isa
+ [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
+ (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
+ (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
+ (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
+
(define_insn "avx2_pbroadcast"
- [(set (match_operand:VI 0 "register_operand" "=x")
+ [(set (match_operand:VI 0 "register_operand" "=x,v")
(vec_duplicate:VI
(vec_select:
- (match_operand: 1 "nonimmediate_operand" "xm")
+ (match_operand: 1 "nonimmediate_operand" "xm,vm")
(parallel [(const_int 0)]]
"TARGET_AVX2"
"vpbroadcast\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
+ [(set_attr "isa" "*,")
+ (set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "vex,evex")
(set_attr "mode" "")])
(define_insn "avx2_pbroadcast_1"
- [(set (match_operand:VI_256 0 "register_operand" "=x,x")
+ [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
(vec_duplicate:VI_256
(vec_select:
- (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
+ (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
(parallel [(const_int 0)]]
"TARGET_AVX2"
"@
vpbroadcast\t{%1, %0|%0, %1}
+ vpbroadcast\t{%x1, %0|%0, %x1}
+ vpbroadcast\t{%1, %0|%0, %1}
vpbroadcast\t{%x1, %0|%0, %x1}"
- [(set_attr "type" "ssemov")
+ [(set_attr "isa" "*,*,,")
+ (set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
--- gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcast-1.c.jj 2016-05-13
16:58:07.491988435 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcast-1.c 2016-05-13
17:31:29.830534782 +0200
@@ -0,0 +1,104 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mavx512bw" } */
+
+#include
+
+void
+f1 (__m128i x)
+{
+ register __m128i a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = _mm_broadcastb_epi8 (a);
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastb\[^\n\r]*xmm16\[^\n\r]*xmm16" } }
*/
+
+void
+f2 (__m128i x)
+{
+ register __m128i a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = _mm_broadcastw_epi16 (a);
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastw\[^\n\r]*xmm16\[^\n\r]*xmm16" } }
*/
+
+void
+f3 (__m128i x)
+{
+ register __m128i a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = _mm_broadcastd_epi32 (a);
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*xmm16\[^\n\r]*xmm16" } }
*/
+
+void
+f4 (__m128i x)
+{
+ register __m128i a __asm ("xmm16");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ a = _mm_broadcastq_epi64 (a);
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastq\[^\n\r]*xmm16\[^\n\r]*xmm16" } }
*/
+
+void
+f5 (__m128i x)
+{
+ register __m128i a __asm ("xmm16");
+ register __m256i b __asm ("xmm17");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ b = _mm256_broadcastb_epi8 (a);
+ asm volatile ("" : "+v" (b));
+}
+
+/* { dg-final { scan-assembler
"vpbroadcastb\[^\n\r]*(xmm1\[67]\[^\n\r]*ymm1\[67]|ymm1\[67]\[^\n\r]*xmm1\[67])"
} } */
+
+void
+f6 (__m128i x)
+{
+ register __m128i a __asm ("xmm16");
+ register __m256i b __asm ("xmm17");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ b = _mm256_broadcastw_epi16 (a);
+ asm volatile ("" : "+v" (b));
+}
+
+/* { dg-final { scan-assembler
"vpbroadcastw\[^\n\r]*(xmm1\[67]\[^\n\r]*ymm1\[67]|ymm1\[67]\[^\n\r]*xmm1\[67])"
} } */
+
+void
+f7 (__m128i x)
+{
+ register __m128i a __asm ("xmm16");
+ register __m256i b __asm ("xmm17");
+ a = x;
+ asm volatile ("" : "+v" (a));
+ b = _mm256_broadcastd_epi32 (a);
+ asm volatile ("" : "+v" (b));
+}