Author: ctopper Date: Thu Jun 7 17:00:21 2018 New Revision: 334249 URL: http://llvm.org/viewvc/llvm-project?rev=334249&view=rev Log: [X86] Add builtins for blend with immediate control to enforce target feature requirements and check immediate range.
Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avx2intrin.h cfe/trunk/lib/Headers/avxintrin.h cfe/trunk/lib/Headers/smmintrin.h cfe/trunk/lib/Sema/SemaChecking.cpp cfe/trunk/test/CodeGen/avx-builtins.c cfe/trunk/test/CodeGen/avx2-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=334249&r1=334248&r2=334249&view=diff ============================================================================== --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Thu Jun 7 17:00:21 2018 @@ -369,6 +369,9 @@ TARGET_BUILTIN(__builtin_ia32_palignr128 TARGET_BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fIc", "nc", "sse4.1") TARGET_BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "nc", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pblendw128, "V8sV8sV8sIi", "nc", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_blendpd, "V2dV2dV2dIi", "nc", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_blendps, "V4fV4fV4fIi", "nc", "sse4.1") TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "nc", "sse4.1") TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "nc", "sse4.1") TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "nc", "sse4.1") @@ -477,6 +480,8 @@ TARGET_BUILTIN(__builtin_ia32_vpermilvar TARGET_BUILTIN(__builtin_ia32_vpermilvarps, "V4fV4fV4i", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256, "V4dV4dV4LLi", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_vpermilvarps256, "V8fV8fV8i", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_blendpd256, "V4dV4dV4dIi", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_blendps256, "V8fV8fV8fIi", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "nc", "avx") @@ -554,6 +559,7 @@ TARGET_BUILTIN(__builtin_ia32_psubusb256 TARGET_BUILTIN(__builtin_ia32_psubusw256, "V16sV16sV16s", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIi", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "nc", "avx2") +TARGET_BUILTIN(__builtin_ia32_pblendw256, "V16sV16sV16sIi", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_phaddw256, "V16sV16sV16s", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_phaddd256, "V8iV8iV8i", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_phaddsw256, "V16sV16sV16s", "nc", "avx2") @@ -603,6 +609,8 @@ TARGET_BUILTIN(__builtin_ia32_psrldi256, TARGET_BUILTIN(__builtin_ia32_psrld256, "V8iV8iV4i", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_psrlqi256, "V4LLiV4LLii", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_psrlq256, "V4LLiV4LLiV2LLi", "nc", "avx2") +TARGET_BUILTIN(__builtin_ia32_pblendd128, "V4iV4iV4iIi", "nc", "avx2") +TARGET_BUILTIN(__builtin_ia32_pblendd256, "V8iV8iV8iIi", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8i", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIc", "nc", "avx2") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=334249&r1=334248&r2=334249&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Jun 7 17:00:21 2018 @@ -9235,6 +9235,27 @@ Value *CodeGenFunction::EmitX86BuiltinEx Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } + case X86::BI__builtin_ia32_pblendw128: + case X86::BI__builtin_ia32_blendpd: + case X86::BI__builtin_ia32_blendps: + case X86::BI__builtin_ia32_blendpd256: + case X86::BI__builtin_ia32_blendps256: + case X86::BI__builtin_ia32_pblendw256: + case X86::BI__builtin_ia32_pblendd128: + case X86::BI__builtin_ia32_pblendd256: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); + + uint32_t Indices[16]; + // If there are more than 8 elements, the immediate is used twice so make + // sure we handle that. + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i; + + return Builder.CreateShuffleVector(Ops[1], Ops[0], + makeArrayRef(Indices, NumElts), + "blend"); + } case X86::BI__builtin_ia32_palignr128: case X86::BI__builtin_ia32_palignr256: case X86::BI__builtin_ia32_palignr512: { Modified: cfe/trunk/lib/Headers/avx2intrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx2intrin.h?rev=334249&r1=334248&r2=334249&view=diff ============================================================================== --- cfe/trunk/lib/Headers/avx2intrin.h (original) +++ cfe/trunk/lib/Headers/avx2intrin.h Thu Jun 7 17:00:21 2018 @@ -170,24 +170,8 @@ _mm256_blendv_epi8(__m256i __V1, __m256i } #define _mm256_blend_epi16(V1, V2, M) \ - (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(V1), \ - (__v16hi)(__m256i)(V2), \ - (((M) & 0x01) ? 16 : 0), \ - (((M) & 0x02) ? 17 : 1), \ - (((M) & 0x04) ? 18 : 2), \ - (((M) & 0x08) ? 19 : 3), \ - (((M) & 0x10) ? 20 : 4), \ - (((M) & 0x20) ? 21 : 5), \ - (((M) & 0x40) ? 22 : 6), \ - (((M) & 0x80) ? 23 : 7), \ - (((M) & 0x01) ? 24 : 8), \ - (((M) & 0x02) ? 25 : 9), \ - (((M) & 0x04) ? 26 : 10), \ - (((M) & 0x08) ? 27 : 11), \ - (((M) & 0x10) ? 28 : 12), \ - (((M) & 0x20) ? 29 : 13), \ - (((M) & 0x40) ? 30 : 14), \ - (((M) & 0x80) ? 31 : 15)) + (__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \ + (__v16hi)(__m256i)(V2), (int)(M)) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cmpeq_epi8(__m256i __a, __m256i __b) @@ -809,24 +793,12 @@ _mm256_broadcastsi128_si256(__m128i __X) } #define _mm_blend_epi32(V1, V2, M) \ - (__m128i)__builtin_shufflevector((__v4si)(__m128i)(V1), \ - (__v4si)(__m128i)(V2), \ - (((M) & 0x01) ? 4 : 0), \ - (((M) & 0x02) ? 5 : 1), \ - (((M) & 0x04) ? 6 : 2), \ - (((M) & 0x08) ? 7 : 3)) + (__m128i)__builtin_ia32_pblendd128((__v4si)(__m128i)(V1), \ + (__v4si)(__m128i)(V2), (int)(M)) #define _mm256_blend_epi32(V1, V2, M) \ - (__m256i)__builtin_shufflevector((__v8si)(__m256i)(V1), \ - (__v8si)(__m256i)(V2), \ - (((M) & 0x01) ? 8 : 0), \ - (((M) & 0x02) ? 9 : 1), \ - (((M) & 0x04) ? 10 : 2), \ - (((M) & 0x08) ? 11 : 3), \ - (((M) & 0x10) ? 12 : 4), \ - (((M) & 0x20) ? 13 : 5), \ - (((M) & 0x40) ? 14 : 6), \ - (((M) & 0x80) ? 15 : 7)) + (__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \ + (__v8si)(__m256i)(V2), (int)(M)) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastb_epi8(__m128i __X) Modified: cfe/trunk/lib/Headers/avxintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=334249&r1=334248&r2=334249&view=diff ============================================================================== --- cfe/trunk/lib/Headers/avxintrin.h (original) +++ cfe/trunk/lib/Headers/avxintrin.h Thu Jun 7 17:00:21 2018 @@ -1355,12 +1355,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i /// operand \a V2 is copied to the same position in the destination. /// \returns A 256-bit vector of [4 x double] containing the copied values. #define _mm256_blend_pd(V1, V2, M) \ - (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V1), \ - (__v4df)(__m256d)(V2), \ - (((M) & 0x01) ? 4 : 0), \ - (((M) & 0x02) ? 5 : 1), \ - (((M) & 0x04) ? 6 : 2), \ - (((M) & 0x08) ? 7 : 3)) + (__m256d)__builtin_ia32_blendpd256((__v4df)(__m256d)(V1), \ + (__v4df)(__m256d)(V2), (int)(M)) /// Merges 32-bit single-precision data values stored in either of the /// two 256-bit vectors of [8 x float], as specified by the immediate @@ -1387,16 +1383,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i /// operand \a V2 is copied to the same position in the destination. /// \returns A 256-bit vector of [8 x float] containing the copied values. #define _mm256_blend_ps(V1, V2, M) \ - (__m256)__builtin_shufflevector((__v8sf)(__m256)(V1), \ - (__v8sf)(__m256)(V2), \ - (((M) & 0x01) ? 8 : 0), \ - (((M) & 0x02) ? 9 : 1), \ - (((M) & 0x04) ? 10 : 2), \ - (((M) & 0x08) ? 11 : 3), \ - (((M) & 0x10) ? 12 : 4), \ - (((M) & 0x20) ? 13 : 5), \ - (((M) & 0x40) ? 14 : 6), \ - (((M) & 0x80) ? 15 : 7)) + (__m256)__builtin_ia32_blendps256((__v8sf)(__m256)(V1), \ + (__v8sf)(__m256)(V2), (int)(M)) /// Merges 64-bit double-precision data values stored in either of the /// two 256-bit vectors of [4 x double], as specified by the 256-bit vector Modified: cfe/trunk/lib/Headers/smmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/smmintrin.h?rev=334249&r1=334248&r2=334249&view=diff ============================================================================== --- cfe/trunk/lib/Headers/smmintrin.h (original) +++ cfe/trunk/lib/Headers/smmintrin.h Thu Jun 7 17:00:21 2018 @@ -390,10 +390,8 @@ /// is copied to the same position in the result. /// \returns A 128-bit vector of [2 x double] containing the copied values. #define _mm_blend_pd(V1, V2, M) \ - (__m128d)__builtin_shufflevector((__v2df)(__m128d)(V1), \ - (__v2df)(__m128d)(V2), \ - (((M) & 0x01) ? 2 : 0), \ - (((M) & 0x02) ? 3 : 1)) + (__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(V1), \ + (__v2df)(__m128d)(V2), (int)(M)) /// Returns a 128-bit vector of [4 x float] where the values are selected /// from either the first or second operand as specified by the third @@ -420,11 +418,8 @@ /// is copied to the same position in the result. /// \returns A 128-bit vector of [4 x float] containing the copied values. #define _mm_blend_ps(V1, V2, M) \ - (__m128)__builtin_shufflevector((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \ - (((M) & 0x01) ? 4 : 0), \ - (((M) & 0x02) ? 5 : 1), \ - (((M) & 0x04) ? 6 : 2), \ - (((M) & 0x08) ? 7 : 3)) + (__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(V1), \ + (__v4sf)(__m128)(V2), (int)(M)) /// Returns a 128-bit vector of [2 x double] where the values are /// selected from either the first or second operand as specified by the @@ -532,16 +527,8 @@ _mm_blendv_epi8 (__m128i __V1, __m128i _ /// is copied to the same position in the result. /// \returns A 128-bit vector of [8 x i16] containing the copied values. #define _mm_blend_epi16(V1, V2, M) \ - (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(V1), \ - (__v8hi)(__m128i)(V2), \ - (((M) & 0x01) ? 8 : 0), \ - (((M) & 0x02) ? 9 : 1), \ - (((M) & 0x04) ? 10 : 2), \ - (((M) & 0x08) ? 11 : 3), \ - (((M) & 0x10) ? 12 : 4), \ - (((M) & 0x20) ? 13 : 5), \ - (((M) & 0x40) ? 14 : 6), \ - (((M) & 0x80) ? 15 : 7)) + (__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(V1), \ + (__v8hi)(__m128i)(V2), (int)(M)) /* SSE4 Dword Multiply Instructions. */ /// Multiples corresponding elements of two 128-bit vectors of [4 x i32] Modified: cfe/trunk/lib/Sema/SemaChecking.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=334249&r1=334248&r2=334249&view=diff ============================================================================== --- cfe/trunk/lib/Sema/SemaChecking.cpp (original) +++ cfe/trunk/lib/Sema/SemaChecking.cpp Thu Jun 7 17:00:21 2018 @@ -2624,6 +2624,7 @@ bool Sema::CheckX86BuiltinFunctionCall(u i = 1; l = 0; u = 7; break; case X86::BI__builtin_ia32_sha1rnds4: + case X86::BI__builtin_ia32_blendpd: case X86::BI__builtin_ia32_vec_set_v4hi: case X86::BI__builtin_ia32_vec_set_v4si: case X86::BI__builtin_ia32_vec_set_v4di: @@ -2683,6 +2684,9 @@ bool Sema::CheckX86BuiltinFunctionCall(u case X86::BI__builtin_ia32_vec_ext_v16hi: i = 1; l = 0; u = 15; break; + case X86::BI__builtin_ia32_pblendd128: + case X86::BI__builtin_ia32_blendps: + case X86::BI__builtin_ia32_blendpd256: case X86::BI__builtin_ia32_roundss: case X86::BI__builtin_ia32_roundsd: case X86::BI__builtin_ia32_rangepd128_mask: @@ -2754,6 +2758,10 @@ bool Sema::CheckX86BuiltinFunctionCall(u case X86::BI__builtin_ia32_fpclassss_mask: i = 1; l = 0; u = 255; break; + case X86::BI__builtin_ia32_pblendw128: + case X86::BI__builtin_ia32_pblendw256: + case X86::BI__builtin_ia32_blendps256: + case X86::BI__builtin_ia32_pblendd256: case X86::BI__builtin_ia32_palignr128: case X86::BI__builtin_ia32_palignr256: case X86::BI__builtin_ia32_palignr512: Modified: cfe/trunk/test/CodeGen/avx-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=334249&r1=334248&r2=334249&view=diff ============================================================================== --- cfe/trunk/test/CodeGen/avx-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx-builtins.c Thu Jun 7 17:00:21 2018 @@ -59,7 +59,7 @@ __m256 test_mm256_andnot_ps(__m256 A, __ __m256d test_mm256_blend_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_blend_pd // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3> - return _mm256_blend_pd(A, B, 0x35); + return _mm256_blend_pd(A, B, 0x05); } __m256 test_mm256_blend_ps(__m256 A, __m256 B) { Modified: cfe/trunk/test/CodeGen/avx2-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=334249&r1=334248&r2=334249&view=diff ============================================================================== --- cfe/trunk/test/CodeGen/avx2-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx2-builtins.c Thu Jun 7 17:00:21 2018 @@ -141,7 +141,7 @@ __m128i test_mm_blend_epi32(__m128i a, _ // CHECK-LABEL: test_mm_blend_epi32 // CHECK-NOT: @llvm.x86.avx2.pblendd.128 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3> - return _mm_blend_epi32(a, b, 0x35); + return _mm_blend_epi32(a, b, 0x05); } __m256i test_mm256_blend_epi32(__m256i a, __m256i b) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits