Author: ctopper Date: Wed Aug 30 09:15:12 2017 New Revision: 312135 URL: http://llvm.org/viewvc/llvm-project?rev=312135&view=rev Log: [X86] Implement broadcastf32x2 and broadcasti32x2 intrinsics using __builtin_shufflevector instead builtins
This patch implements the broadcastf32x2/broadcasti32x2 intrinsics using __builtin_shufflevector. Differential Revision: https://reviews.llvm.org/D37287 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512dqintrin.h cfe/trunk/lib/Headers/avx512vldqintrin.h cfe/trunk/test/CodeGen/avx512dq-builtins.c cfe/trunk/test/CodeGen/avx512vldq-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=312135&r1=312134&r2=312135&view=diff ============================================================================== --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Aug 30 09:15:12 2017 @@ -1596,11 +1596,6 @@ TARGET_BUILTIN(__builtin_ia32_broadcastm TARGET_BUILTIN(__builtin_ia32_broadcastmb256, "V4LLiUc","","avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcastmw128, "V4iUs","","avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcastmw256, "V8iUs","","avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcastf32x2_512_mask, "V16fV4fV16fUs","","avx512dq") -TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_512_mask, "V16iV4iV16iUs","","avx512dq") -TARGET_BUILTIN(__builtin_ia32_broadcastf32x2_256_mask, "V8fV4fV8fUc","","avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_128_mask, "V4iV4iV4iUc","","avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_256_mask, "V8iV4iV8iUc","","avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_pbroadcastw512_gpr_mask, "V32shV32sUi","","avx512bw") TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_gpr_mask, "V16shV16sUs","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_gpr_mask, "V8ssV8sUc","","avx512bw,avx512vl") Modified: cfe/trunk/lib/Headers/avx512dqintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=312135&r1=312134&r2=312135&view=diff ============================================================================== --- cfe/trunk/lib/Headers/avx512dqintrin.h (original) +++ cfe/trunk/lib/Headers/avx512dqintrin.h Wed Aug 30 09:15:12 2017 @@ -973,25 +973,26 @@ _mm512_movepi64_mask (__m512i __A) static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_broadcast_f32x2 (__m128 __A) { - return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, - (__v16sf)_mm512_undefined_ps(), - (__mmask16) -1); + return (__m512)__builtin_shufflevector((__v4sf)__A, + (__v4sf)_mm_undefined_ps(), + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) { - return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, - (__v16sf) - __O, __M); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, + (__v16sf)_mm512_broadcast_f32x2(__A), + (__v16sf)__O); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) { - return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, - (__v16sf)_mm512_setzero_ps (), - __M); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, + (__v16sf)_mm512_broadcast_f32x2(__A), + (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512 __DEFAULT_FN_ATTRS @@ -1044,25 +1045,26 @@ _mm512_maskz_broadcast_f64x2(__mmask8 __ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcast_i32x2 (__m128i __A) { - return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A, - (__v16si)_mm512_setzero_si512(), - (__mmask16) -1); + return (__m512i)__builtin_shufflevector((__v4si)__A, + (__v4si)_mm_undefined_si128(), + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) { - return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A, - (__v16si) - __O, __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_broadcast_i32x2(__A), + (__v16si)__O); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) { - return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A, - (__v16si)_mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_broadcast_i32x2(__A), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS Modified: cfe/trunk/lib/Headers/avx512vldqintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vldqintrin.h?rev=312135&r1=312134&r2=312135&view=diff ============================================================================== --- cfe/trunk/lib/Headers/avx512vldqintrin.h (original) +++ cfe/trunk/lib/Headers/avx512vldqintrin.h Wed Aug 30 09:15:12 2017 @@ -978,25 +978,25 @@ _mm256_movepi64_mask (__m256i __A) static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_f32x2 (__m128 __A) { - return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, - (__v8sf)_mm256_undefined_ps(), - (__mmask8) -1); + return (__m256)__builtin_shufflevector((__v4sf)__A, + (__v4sf)_mm_undefined_ps(), + 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) { - return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, - (__v8sf) __O, - __M); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, + (__v8sf)_mm256_broadcast_f32x2(__A), + (__v8sf)__O); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) { - return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, - (__v8sf) _mm256_setzero_ps (), - __M); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, + (__v8sf)_mm256_broadcast_f32x2(__A), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m256d __DEFAULT_FN_ATTRS @@ -1025,49 +1025,49 @@ _mm256_maskz_broadcast_f64x2 (__mmask8 _ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcast_i32x2 (__m128i __A) { - return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A, - (__v4si)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector((__v4si)__A, + (__v4si)_mm_undefined_si128(), + 0, 1, 0, 1); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) { - return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A, - (__v4si) __O, - __M); + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, + (__v4si)_mm_broadcast_i32x2(__A), + (__v4si)__O); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) { - return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A, - (__v4si) _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, + (__v4si)_mm_broadcast_i32x2(__A), + (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcast_i32x2 (__m128i __A) { - return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A, - (__v8si)_mm256_undefined_si256(), - (__mmask8) -1); + return (__m256i)__builtin_shufflevector((__v4si)__A, + (__v4si)_mm_undefined_si128(), + 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) { - return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A, - (__v8si) __O, - __M); + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, + (__v8si)_mm256_broadcast_i32x2(__A), + (__v8si)__O); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) { - return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A, - (__v8si) _mm256_setzero_si256 (), - __M); + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, + (__v8si)_mm256_broadcast_i32x2(__A), + (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS Modified: cfe/trunk/test/CodeGen/avx512dq-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512dq-builtins.c?rev=312135&r1=312134&r2=312135&view=diff ============================================================================== --- cfe/trunk/test/CodeGen/avx512dq-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512dq-builtins.c Wed Aug 30 09:15:12 2017 @@ -949,19 +949,21 @@ __mmask8 test_mm512_movepi64_mask(__m512 __m512 test_mm512_broadcast_f32x2(__m128 __A) { // CHECK-LABEL: @test_mm512_broadcast_f32x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> return _mm512_broadcast_f32x2(__A); } __m512 test_mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A) { // CHECK-LABEL: @test_mm512_mask_broadcast_f32x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_broadcast_f32x2(__O, __M, __A); } __m512 test_mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A) { // CHECK-LABEL: @test_mm512_maskz_broadcast_f32x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_broadcast_f32x2(__M, __A); } @@ -1007,19 +1009,21 @@ __m512d test_mm512_maskz_broadcast_f64x2 __m512i test_mm512_broadcast_i32x2(__m128i __A) { // CHECK-LABEL: @test_mm512_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> return _mm512_broadcast_i32x2(__A); } __m512i test_mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A) { // CHECK-LABEL: @test_mm512_mask_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_broadcast_i32x2(__O, __M, __A); } __m512i test_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) { // CHECK-LABEL: @test_mm512_maskz_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_broadcast_i32x2(__M, __A); } Modified: cfe/trunk/test/CodeGen/avx512vldq-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vldq-builtins.c?rev=312135&r1=312134&r2=312135&view=diff ============================================================================== --- cfe/trunk/test/CodeGen/avx512vldq-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512vldq-builtins.c Wed Aug 30 09:15:12 2017 @@ -909,19 +909,21 @@ __mmask8 test_mm256_movepi64_mask(__m256 __m256 test_mm256_broadcast_f32x2(__m128 __A) { // CHECK-LABEL: @test_mm256_broadcast_f32x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> return _mm256_broadcast_f32x2(__A); } __m256 test_mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A) { // CHECK-LABEL: @test_mm256_mask_broadcast_f32x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_broadcast_f32x2(__O, __M, __A); } __m256 test_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) { // CHECK-LABEL: @test_mm256_maskz_broadcast_f32x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_broadcast_f32x2(__M, __A); } @@ -947,37 +949,41 @@ __m256d test_mm256_maskz_broadcast_f64x2 __m128i test_mm_broadcast_i32x2(__m128i __A) { // CHECK-LABEL: @test_mm_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 0, i32 1> return _mm_broadcast_i32x2(__A); } __m128i test_mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm_mask_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 0, i32 1> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_broadcast_i32x2(__O, __M, __A); } __m128i test_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 0, i32 1> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_broadcast_i32x2(__M, __A); } __m256i test_mm256_broadcast_i32x2(__m128i __A) { // CHECK-LABEL: @test_mm256_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> return _mm256_broadcast_i32x2(__A); } __m256i test_mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm256_mask_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_broadcast_i32x2(__O, __M, __A); } __m256i test_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm256_maskz_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_broadcast_i32x2(__M, __A); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits