llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-x86 Author: woruyu (woruyu) <details> <summary>Changes</summary> ### Summary This PR resolves https://github.com/llvm/llvm-project/issues/154283 --- Patch is 29.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156003.diff 12 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsX86.td (+16-12) - (modified) clang/lib/AST/ExprConstant.cpp (+99) - (modified) clang/lib/Headers/avx2intrin.h (+8-12) - (modified) clang/lib/Headers/avx512bwintrin.h (+8-12) - (modified) clang/lib/Headers/emmintrin.h (+6-6) - (modified) clang/lib/Headers/mmintrin.h (+12-15) - (modified) clang/lib/Headers/smmintrin.h (+2-2) - (modified) clang/test/CodeGen/X86/avx2-builtins.c (+4) - (modified) clang/test/CodeGen/X86/avx512bw-builtins.c (+4) - (modified) clang/test/CodeGen/X86/mmx-builtins.c (+3) - (modified) clang/test/CodeGen/X86/sse2-builtins.c (+3) - (modified) clang/test/CodeGen/X86/sse41-builtins.c (+1) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 527acd9ef086e..737be0f673e96 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -95,9 +95,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { let Features = "sse2" in { def pavgb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; def pavgw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; - def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">; - def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">; - def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">; def vec_ext_v2di : X86Builtin<"long long int(_Vector<2, long long int>, _Constant int)">; def vec_ext_v4si : X86Builtin<"int(_Vector<4, int>, _Constant int)">; def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">; @@ -108,6 +105,9 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def pmulhw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; def pmulhuw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">; + def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">; + def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">; + def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">; } let Features = "sse3" in { @@ -314,7 +314,6 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] def blendps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">; def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">; - def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">; def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">; def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">; @@ -333,6 +332,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">; + def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">; } let Features = "sse4.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { @@ -568,10 +568,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; - def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">; - def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">; - def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">; - def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">; def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">; def pavgb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; def pavgw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; @@ -636,6 +632,11 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def psrlv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; def psllv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">; def psrlv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">; + + def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">; + def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">; + def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">; + def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">; } let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { @@ -1355,15 +1356,18 @@ let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWi let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def ucmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">; - def packssdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">; - def packsswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">; - def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">; - def packuswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">; def pavgb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">; def pavgw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">; } +let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def packsswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">; + def packssdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">; + def packuswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">; + def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">; +} + let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def vpconflictdi_128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>)">; } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index a71cb8b0143be..8f649eca776ec 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11599,6 +11599,89 @@ static bool handleVectorElementCast(EvalInfo &Info, const FPOptions FPO, return false; } +enum class PackKind { + SSWB, + USWB, + SSDW, + USDW +}; // 16→8 signed/unsigned; 32→16 signed/unsigned + +static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result, + PackKind K) { + APValue L, R; + if (!EvaluateAsRValue(Info, E->getArg(0), L) || + !EvaluateAsRValue(Info, E->getArg(1), R)) + return false; + + unsigned SrcBits = (K == PackKind::SSWB || K == PackKind::USWB) ? 16 : 32; + unsigned DstBits = SrcBits / 2; + + unsigned NL = L.getVectorLength(); + unsigned NR = R.getVectorLength(); + if (NL == 0 || NR == 0 || NL != NR) + return false; + + // Bounds for saturation (extended to SrcBits for compares). + APInt Lo = (K == PackKind::USWB || K == PackKind::USDW) + ? APInt(SrcBits, 0) + : APInt::getSignedMinValue(DstBits).sext(SrcBits); + APInt Hi = (K == PackKind::USWB || K == PackKind::USDW) + ? APInt::getMaxValue(DstBits).zext(SrcBits) + : APInt::getSignedMaxValue(DstBits).sext(SrcBits); + + // Result element signedness follows the builtin's return vector element type. + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + bool DestIsUnsigned = DestEltTy->isUnsignedIntegerType(); + + // Clamp one source element to the target range and narrow to DstBits. + auto clampOne = [&](const APSInt &X) -> APSInt { + APInt V = X; + if (V.getBitWidth() != SrcBits) + V = V.sextOrTrunc(SrcBits); + + if (K == PackKind::USWB || K == PackKind::USDW) { + if (V.isNegative()) + V = Lo; + else if (V.ugt(Hi)) + V = Hi; + APInt Narrow = V.zextOrTrunc(DstBits); + return APSInt(Narrow, /*isUnsigned=*/true); + } else { + if (V.sgt(Hi)) + V = Hi; + else if (V.slt(Lo)) + V = Lo; + APInt Narrow = V.sextOrTrunc(DstBits); + return APSInt(Narrow, /*isUnsigned=*/DestIsUnsigned); + } + }; + + SmallVector<APValue, 64> Out; + Out.reserve(NL + NR); + + // Process per 128-bit lane (MMX 64-bit uses a single lane). + unsigned VectorBits = NL * SrcBits; + unsigned srcPerLane, lanes; + if (VectorBits >= 128) { + srcPerLane = 128 / SrcBits; // 8 (16→8) or 4 (32→16) + lanes = VectorBits / 128; // 1 (128b), 2 (256b), 4 (512b) + } else { + srcPerLane = NL; // MMX + lanes = 1; + } + + for (unsigned lane = 0; lane != lanes; ++lane) { + unsigned base = lane * srcPerLane; + for (unsigned i = 0; i != srcPerLane; ++i) + Out.push_back(APValue(clampOne(L.getVectorElt(base + i).getInt()))); + for (unsigned i = 0; i != srcPerLane; ++i) + Out.push_back(APValue(clampOne(R.getVectorElt(base + i).getInt()))); + } + + Result = APValue(Out.data(), Out.size()); + return true; +} + bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!IsConstantEvaluatedBuiltinCall(E)) return ExprEvaluatorBaseTy::VisitCallExpr(E); @@ -11752,6 +11835,22 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_packsswb128: + case X86::BI__builtin_ia32_packsswb256: + case X86::BI__builtin_ia32_packsswb512: + return evalPackBuiltin(E, Info, Result, PackKind::SSWB); + case X86::BI__builtin_ia32_packuswb128: + case X86::BI__builtin_ia32_packuswb256: + case X86::BI__builtin_ia32_packuswb512: + return evalPackBuiltin(E, Info, Result, PackKind::USWB); + case X86::BI__builtin_ia32_packssdw128: + case X86::BI__builtin_ia32_packssdw256: + case X86::BI__builtin_ia32_packssdw512: + return evalPackBuiltin(E, Info, Result, PackKind::SSDW); + case X86::BI__builtin_ia32_packusdw128: + case X86::BI__builtin_ia32_packusdw256: + case X86::BI__builtin_ia32_packusdw512: + return evalPackBuiltin(E, Info, Result, PackKind::USDW); case clang::X86::BI__builtin_ia32_pmuldq128: case clang::X86::BI__builtin_ia32_pmuldq256: case clang::X86::BI__builtin_ia32_pmuldq512: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index ce5b2b7544d8c..22d7157f0db58 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -177,9 +177,8 @@ _mm256_abs_epi32(__m256i __a) /// A 256-bit vector of [16 x i16] used to generate result[127:64] and /// result[255:192]. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_packs_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_packs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b); } @@ -209,9 +208,8 @@ _mm256_packs_epi16(__m256i __a, __m256i __b) /// A 256-bit vector of [8 x i32] used to generate result[127:64] and /// result[255:192]. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_packs_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_packs_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b); } @@ -240,9 +238,8 @@ _mm256_packs_epi32(__m256i __a, __m256i __b) /// A 256-bit vector of [16 x i16] used to generate result[127:64] and /// result[255:192]. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_packus_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_packus_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b); } @@ -272,9 +269,8 @@ _mm256_packus_epi16(__m256i __a, __m256i __b) /// A 256-bit vector of [8 x i32] used to generate result[127:64] and /// result[255:192]. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_packus_epi32(__m256i __V1, __m256i __V2) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_packus_epi32(__m256i __V1, __m256i __V2) { return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2); } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 9263f7af3ee2f..b56f53107fa92 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -525,9 +525,8 @@ _mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_packs_epi32(__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_packs_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B); } @@ -547,9 +546,8 @@ _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_packs_epi16(__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_packs_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B); } @@ -569,9 +567,8 @@ _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_packus_epi32(__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_packus_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B); } @@ -591,9 +588,8 @@ _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_packus_epi16(__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_packus_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B); } diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 8b6b62458dac1..f3c9eb7158c1f 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -4166,8 +4166,8 @@ void _mm_mfence(void); /// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are /// written to the higher 64 bits of the result. /// \returns A 128-bit vector of [16 x i8] containing the converted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_packs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b); } @@ -4189,8 +4189,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, /// A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values /// are written to the higher 64 bits of the result. /// \returns A 128-bit vector of [8 x i16] containing the converted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_packs_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b); } @@ -4212,8 +4212,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, /// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are /// written to the higher 64 bits of the result. /// \returns A 128-bit vector of [16 x i8] containing the converted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_packus_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b); } diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h index 6fe9d67b8976d..aa6845f60ee99 100644 --- a/clang/lib/Headers/mmintrin.h +++ b/clang/lib/Headers/mmintrin.h @@ -162,11 +162,10 @@ _mm_cvtm64_si64(__m64 __m) /// written to the upper 32 bits of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the converted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_packs_pi16(__m64 __m1, __m64 __m2) -{ - return __trunc64(__builtin_ia32_packsswb128( - (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_packs_pi16(__m64 __m1, __m64 __m2) { + return __trunc64(__builtin_ia32_packsswb128( + (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){})); } /// Converts, with saturation, 32-bit signed integers from both 64-bit integer @@ -188,11 +187,10 @@ _mm_packs_pi16(__m64 __m1, __m64 __m2) /// written to the upper 32 bits of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the converted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_packs_pi32(__m64 __m1, __m64 __m2) -{ - return __trunc64(__builtin_ia32_packssdw128( - (__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_packs_pi32(__m64 __m1, __m64 __m2) { + return __trunc64(__builtin_ia32_packssdw128( + (__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){})); } /// Converts, with saturation, 16-bit signed integers from both 64-bit integer @@ -214,11 +212,10 @@ _mm_packs_pi32(__m64 __m1, __m64 __m2) /// written to the upper 32 bits of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the converted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_packs_pu16(__m64 __m1, __m64 __m2) -{ - return __trunc64(__builtin_ia32_packuswb128( - (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_packs_pu16(__m64 __m1, __m64 __m2) { + return __trunc64(__builtin_ia32_packuswb128( + (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){})); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 57d0d329312af..8acf4929d1125 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -1475,8 +1475,8 @@ _mm_cvtepu32_epi64(__m128i __V) { /// A 128-bit vector of [4 x i32]. The converted [4 x i16] values are /// written to the higher 64 bits of the result. /// \returns A 128-bit vector of [8 x i16] containing the converted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1, - __m128i __V2) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_packus_epi32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2); } diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 29cb3e8860be9..d2d8b53c24a96 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/156003 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits