================
@@ -2501,125 +2501,124 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512
__B) {
-(__v8df)(__m512d)(C), \
(__mmask8)(U), (int)(R)))
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
+ return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
+ (__v8df)__C);
+}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
-{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8)__U,
+ __builtin_elementwise_fma((__v8df)__A, (__v8df)__B, (__v8df)__C),
+ (__v8df)__A);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
-{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8)__U,
+ __builtin_elementwise_fma((__v8df)__A, (__v8df)__B, (__v8df)__C),
+ (__v8df)__C);
----------------
ckoparkar wrote:
Good idea to reuse the base version.
Doing this for 128-bit and 256-bit intrinsics generates a lot of errors, the
512-bit ones are fine. Is this expected and the reason why the base intrinsic
wasn't reused anywhere in `avx512vlintrin.h`? This is the error message I get
for all of them:
`always_inline function '_mm_fmadd_pd' requires target feature 'evex512', but
would be inlined into function '_mm_mask_fmadd_pd' that is compiled without
support for 'evex512'`
I don't fully understand this but
https://github.com/llvm/llvm-project/issues/70002#issuecomment-1801398114
suggests `-march=native` could cause this. I don't see the test using this
flag, it is compiled with:
`// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s
-triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl
-emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s`.
https://github.com/llvm/llvm-project/pull/156385
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits