llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Chaitanya Koparkar (ckoparkar)

<details>
<summary>Changes</summary>

Fixes #<!-- -->155265

Add constexpr support for the following:

_mm512_fmadd_pd _mm512_mask_fmadd_pd _mm512_mask3_fmadd_pd 
_mm512_maskz_fmadd_pd _mm512_fmadd_ps _mm512_mask_fmadd_ps 
_mm512_mask3_fmadd_ps _mm512_maskz_fmadd_ps _mm_mask_fmadd_pd 
_mm_mask3_fmadd_pd _mm_maskz_fmadd_pd _mm_mask_fmadd_ps _mm_mask3_fmadd_ps 
_mm_maskz_fmadd_ps _mm256_mask_fmadd_pd _mm256_mask3_fmadd_pd 
_mm256_maskz_fmadd_pd _mm256_mask_fmadd_ps _mm256_mask3_fmadd_ps 
_mm256_maskz_fmadd_ps

_mm512_fmsub_pd _mm512_mask_fmsub_pd _mm512_mask3_fmsub_pd 
_mm512_maskz_fmsub_pd _mm512_fmsub_ps _mm512_mask_fmsub_ps 
_mm512_mask3_fmsub_ps _mm512_maskz_fmsub_ps _mm_mask_fmsub_pd 
_mm_mask3_fmsub_pd _mm_maskz_fmsub_pd _mm_mask_fmsub_ps _mm_mask3_fmsub_ps 
_mm_maskz_fmsub_ps _mm256_mask_fmsub_pd _mm256_mask3_fmsub_pd 
_mm256_maskz_fmsub_pd _mm256_mask_fmsub_ps _mm256_mask3_fmsub_ps 
_mm256_maskz_fmsub_ps

_mm512_fnmadd_pd _mm512_mask_fnmadd_pd _mm512_mask3_fnmadd_pd 
_mm512_maskz_fnmadd_pd _mm512_fnmsub_pd _mm512_mask_fnmsub_pd 
_mm512_mask3_fnmsub_pd _mm512_maskz_fnmsub_pd _mm_mask_fnmadd_pd 
_mm_mask3_fnmadd_pd _mm_maskz_fnmadd_pd _mm_mask_fnmadd_ps _mm_mask3_fnmadd_ps 
_mm_maskz_fnmadd_ps _mm256_mask_fnmadd_pd _mm256_mask3_fnmadd_pd 
_mm256_maskz_fnmadd_pd _mm256_mask_fnmadd_ps _mm256_mask3_fnmadd_ps 
_mm256_maskz_fnmadd_ps

_mm512_fnmadd_ps _mm512_mask_fnmadd_ps _mm512_mask3_fnmadd_ps 
_mm512_maskz_fnmadd_ps _mm512_fnmsub_ps _mm512_mask_fnmsub_ps 
_mm512_mask3_fnmsub_ps _mm512_maskz_fnmsub_ps _mm_mask_fnmsub_pd 
_mm_mask3_fnmsub_pd _mm_maskz_fnmsub_pd _mm_mask_fnmsub_ps _mm_mask3_fnmsub_ps 
_mm_maskz_fnmsub_ps _mm256_mask_fnmsub_pd _mm256_mask3_fnmsub_pd 
_mm256_maskz_fnmsub_pd _mm256_mask_fnmsub_ps _mm256_mask3_fnmsub_ps 
_mm256_maskz_fnmsub_ps

---

Patch is 121.35 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/156385.diff


4 Files Affected:

- (modified) clang/lib/Headers/avx512fintrin.h (+168-224) 
- (modified) clang/lib/Headers/avx512vlintrin.h (+49-49) 
- (modified) clang/test/CodeGen/X86/avx512f-builtins.c (+132-76) 
- (modified) clang/test/CodeGen/X86/avx512vl-builtins.c (+96) 


``````````diff
diff --git a/clang/lib/Headers/avx512fintrin.h 
b/clang/lib/Headers/avx512fintrin.h
index e23b1c0381ab1..741ce26aaa043 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -2502,124 +2502,136 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 
__B) {
                                              (__mmask8)(U), (int)(R)))
 
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-                                                    (__v8df) __B,
-                                                    (__v8df) __C,
-                                                    (__mmask8) -1,
-                                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512d) __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, 
(__v8df) __C);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-                                                    (__v8df) __B,
-                                                    (__v8df) __C,
-                                                    (__mmask8) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8) __U,
+      __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, (__v8df) __C),
+      (__v8df) __A);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
-                                                     (__v8df) __B,
-                                                     (__v8df) __C,
-                                                     (__mmask8) __U,
-                                                     _MM_FROUND_CUR_DIRECTION);
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8) __U,
+      __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, (__v8df) __C),
+      (__v8df) __C);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
-                                                     (__v8df) __B,
-                                                     (__v8df) __C,
-                                                     (__mmask8) __U,
-                                                     _MM_FROUND_CUR_DIRECTION);
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8) __U,
+      __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, (__v8df) __C),
+      (__v8df) _mm512_setzero_pd());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-                                                    (__v8df) __B,
-                                                    -(__v8df) __C,
-                                                    (__mmask8) -1,
-                                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512d) __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, 
-(__v8df) __C);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-                                                    (__v8df) __B,
-                                                    -(__v8df) __C,
-                                                    (__mmask8) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8) __U,
+      __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, -(__v8df) __C),
+      (__v8df) __A);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8) __U,
+      __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, -(__v8df) __C),
+      (__v8df) __C);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
-                                                     (__v8df) __B,
-                                                     -(__v8df) __C,
-                                                     (__mmask8) __U,
-                                                     _MM_FROUND_CUR_DIRECTION);
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8) __U,
+      __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, -(__v8df) __C),
+      (__v8df) _mm512_setzero_pd());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-                                                    -(__v8df) __B,
-                                                    (__v8df) __C,
-                                                    (__mmask8) -1,
-                                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512d)__builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, 
(__v8df) __C);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8) __U,
+      __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, (__v8df) __C),
+      (__v8df) __A);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
-                                                     (__v8df) __B,
-                                                     (__v8df) __C,
-                                                     (__mmask8) __U,
-                                                     _MM_FROUND_CUR_DIRECTION);
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8) __U,
+      __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, (__v8df) __C),
+      (__v8df) __C);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
-                                                     (__v8df) __B,
-                                                     (__v8df) __C,
-                                                     (__mmask8) __U,
-                                                     _MM_FROUND_CUR_DIRECTION);
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8) __U,
+      __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, (__v8df) __C),
+      (__v8df) _mm512_setzero_pd());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-                                                    -(__v8df) __B,
-                                                    -(__v8df) __C,
-                                                    (__mmask8) -1,
-                                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512d) __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, 
-(__v8df) __C);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8) __U,
+      __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, -(__v8df) __C),
+      (__v8df) __A);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8) __U,
+      __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, -(__v8df) __C),
+      (__v8df) __C);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
-                                                     (__v8df) __B,
-                                                     -(__v8df) __C,
-                                                     (__mmask8) __U,
-                                                     _MM_FROUND_CUR_DIRECTION);
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8) __U,
+      __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, -(__v8df) __C),
+      (__v8df) _mm512_setzero_pd());
 }
 
 #define _mm512_fmadd_round_ps(A, B, C, R) \
@@ -2706,124 +2718,136 @@ _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, 
__m512d __B, __m512d __C)
                                             (__mmask16)(U), (int)(R)))
 
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-                                                   (__v16sf) __B,
-                                                   (__v16sf) __C,
-                                                   (__mmask16) -1,
-                                                   _MM_FROUND_CUR_DIRECTION);
+  return (__m512 ) __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, 
(__v16sf)__C);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-                                                   (__v16sf) __B,
-                                                   (__v16sf) __C,
-                                                   (__mmask16) __U,
-                                                   _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_selectps_512(
+      (__mmask16)__U,
+      __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+      (__v16sf)__A);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
-                                                    (__v16sf) __B,
-                                                    (__v16sf) __C,
-                                                    (__mmask16) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_selectps_512(
+      (__mmask16)__U,
+      __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+      (__v16sf)__C);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
-                                                    (__v16sf) __B,
-                                                    (__v16sf) __C,
-                                                    (__mmask16) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_selectps_512(
+      (__mmask16)__U,
+      __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+      (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-                                                   (__v16sf) __B,
-                                                   -(__v16sf) __C,
-                                                   (__mmask16) -1,
-                                                   _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, 
-(__v16sf)__C);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-                                                   (__v16sf) __B,
-                                                   -(__v16sf) __C,
-                                                   (__mmask16) __U,
-                                                   _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_selectps_512(
+      (__mmask16)__U,
+      __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+      (__v16sf)__A);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_selectps_512(
+      (__mmask16)__U,
+      __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+      (__v16sf)__C);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
-                                                    (__v16sf) __B,
-                                                    -(__v16sf) __C,
-                                                    (__mmask16) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_selectps_512(
+      (__mmask16)__U,
+      __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+      (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-                                                   -(__v16sf) __B,
-                                                   (__v16sf) __C,
-                                                   (__mmask16) -1,
-                                                   _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, 
(__v16sf)__C);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_selectps_512(
+      (__mmask16)__U,
+      __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+      (__v16sf)__A);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
-                                                    (__v16sf) __B,
-                                                    (__v16sf) __C,
-                                                    (__mmask16) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_selectps_512(
+      (__mmask16)__U,
+      __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+      (__v16sf)__C);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
-                                                    (__v16sf) __B,
-                                                    (__v16sf) __C,
-                                                    (__mmask16) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_selectps_512(
+      (__mmask16)__U,
+      __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+      (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-                                                   -(__v16sf) __B,
-                                                   -(__v16sf) __C,
-                                                   (__mmask16) -1,
-                                                   _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, 
-(__v16sf)__C);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_selectps_512(
+      (__mmask16)__U,
+      __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+      (__v16sf)__A);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_selectps_512(
+      (__mmask16)__U,
+      __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+      (__v16sf)__C);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
-                                                    (__v16sf) __B,
-                                                    -(__v16sf) __C,
-                                                    (__mmask16) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_selectps_512(
+      (__mmask16)__U,
+      __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+      (__v16sf)_mm512_setzero_ps());
 }
 
 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
@@ -3071,15 +3095,6 @@ _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, 
__m512 __B, __m512 __C)
                                              (__mmask8)(U), (int)(R)))
 
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
-{
-  return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
-                                                    (__v8df) __B,
-                                                    (__v8df) __C,
-                                                    (__mmask8) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
-}
 
 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
   ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
@@ -3087,16 +3102,6 @@ _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d 
__C, __mmask8 __U)
                                          ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/156385
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to