Hello, This patch introduces missing AVX-512PF intrinsics and tests. It also renames store/load intrinsics according to EAS.
gcc/ * config/i386/avx512fintrin.h (_mm512_loadu_si512): Rename. (_mm512_storeu_si512): Ditto. * config/i386/avx512pfintrin.h (_mm512_mask_prefetch_i32gather_pd): New. (_mm512_mask_prefetch_i64gather_pd): Ditto. (_mm512_prefetch_i32scatter_pd): Ditto. (_mm512_mask_prefetch_i32scatter_pd): Ditto. (_mm512_prefetch_i64scatter_pd): Ditto. (_mm512_mask_prefetch_i64scatter_pd): Ditto. (_mm512_mask_prefetch_i32gather_ps): Fix operand type. (_mm512_mask_prefetch_i64gather_ps): Ditto. (_mm512_prefetch_i32scatter_ps): Ditto. (_mm512_mask_prefetch_i32scatter_ps): Ditto. (_mm512_prefetch_i64scatter_ps): Ditto. (_mm512_mask_prefetch_i64scatter_ps): Ditto. * config/i386/i386-builtin-types.def: Define VOID_FTYPE_QI_V8SI_PCINT64_INT_INT and VOID_FTYPE_QI_V8DI_PCINT64_INT_INT. * config/i386/i386.c (ix86_builtins): Define IX86_BUILTIN_GATHERPFQPD, IX86_BUILTIN_GATHERPFDPD, IX86_BUILTIN_SCATTERPFDPD, IX86_BUILTIN_SCATTERPFQPD. (ix86_init_mmx_sse_builtins): Define __builtin_ia32_gatherpfdpd, __builtin_ia32_gatherpfdps, __builtin_ia32_gatherpfqpd, __builtin_ia32_gatherpfqps, __builtin_ia32_scatterpfdpd, __builtin_ia32_scatterpfdps, __builtin_ia32_scatterpfqpd, __builtin_ia32_scatterpfqps. (ix86_expand_builtin): Expand new built-ins. * config/i386/sse.md (avx512pf_gatherpf<mode>): Add SF suffix, fix memory access data type. (*avx512pf_gatherpf<mode>_mask): Ditto. (*avx512pf_gatherpf<mode>): Ditto. (avx512pf_scatterpf<mode>): Ditto. (*avx512pf_scatterpf<mode>_mask): Ditto. (*avx512pf_scatterpf<mode>): Ditto. (avx512pf_gatherpf<mode>df): New. (*avx512pf_gatherpf<mode>df_mask): Ditto. (*avx512pf_gatherpf<mode>df): Ditto. (avx512pf_scatterpf<mode>df): Ditto. (*avx512pf_scatterpf<mode>df_mask): Ditto. (*avx512pf_scatterpf<mode>df): Ditto. testsuite/ * gcc.target/i386/avx512f-vmovdqu32-1.c: Fix intrinsic name. * gcc.target/i386/avx512f-vmovdqu32-2.c: Ditto. * gcc.target/i386/avx512f-vpcmpd-2.c: Ditto. * gcc.target/i386/avx512f-vpcmpq-2.c: Ditto. * gcc.target/i386/avx512f-vpcmpud-2.c: Ditto. * gcc.target/i386/avx512f-vpcmpuq-2.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf0dpd-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf0qpd-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf1dpd-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf1qpd-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf0dpd-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf0qpd-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf1dpd-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf1qpd-1.c: Ditto. * gcc.target/i386/sse-14.c: Add new built-ins, fix AVX-512ER built-ins roudning immediate. * gcc.target/i386/sse-22.c: Add new built-ins. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/avx-1.c: Ditto. I have a doubts about changes to sse.md. I've splitted existing (SF-only) patterns into 2: DF and SF. As far as insn operands and final instruction have no such data type discrimination I set this data type to (mem:..) part. Having this (for SF): (define_expand "avx512pf_scatterpf<mode>sf" [(unspec [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand") (mem:SF ... instead of this: (define_expand "avx512pf_scatterpf<mode>" [(unspec [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand") (mem:<ssescalarmode> ... Not sure if this (DI/SI) mode for mem is needed. Moreover, not sure what that data type represents. Patch in the bottom. AVX* and SSE* tests pass. Comments or it is ok for trunk? -- Thanks, K --- gcc/config/i386/avx512fintrin.h | 4 +- gcc/config/i386/avx512pfintrin.h | 113 ++++++++++++-- gcc/config/i386/i386-builtin-types.def | 2 + gcc/config/i386/i386.c | 37 ++++- gcc/config/i386/sse.md | 171 +++++++++++++++++++-- gcc/testsuite/gcc.target/i386/avx-1.c | 4 + .../gcc.target/i386/avx512f-vmovdqu32-1.c | 4 +- .../gcc.target/i386/avx512f-vmovdqu32-2.c | 4 +- gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c | 4 +- gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c | 4 +- gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c | 4 +- gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c | 4 +- .../gcc.target/i386/avx512pf-vgatherpf0dpd-1.c | 15 ++ .../gcc.target/i386/avx512pf-vgatherpf0qpd-1.c | 15 ++ .../gcc.target/i386/avx512pf-vgatherpf1dpd-1.c | 15 ++ .../gcc.target/i386/avx512pf-vgatherpf1qpd-1.c | 15 ++ .../gcc.target/i386/avx512pf-vscatterpf0dpd-1.c | 17 ++ .../gcc.target/i386/avx512pf-vscatterpf0qpd-1.c | 17 ++ .../gcc.target/i386/avx512pf-vscatterpf1dpd-1.c | 17 ++ .../gcc.target/i386/avx512pf-vscatterpf1qpd-1.c | 17 ++ gcc/testsuite/gcc.target/i386/sse-14.c | 40 ++--- gcc/testsuite/gcc.target/i386/sse-22.c | 5 + gcc/testsuite/gcc.target/i386/sse-23.c | 4 + 23 files changed, 469 insertions(+), 63 deletions(-) diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 26f8cb6..4e94174 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -5570,7 +5570,7 @@ _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A) extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_loadu_si512 (void const *__P) +_mm512_loadu_epi32 (void const *__P) { return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, (__v16si) @@ -5599,7 +5599,7 @@ _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P) extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_storeu_si512 (void *__P, __m512i __A) +_mm512_storeu_epi32 (void *__P, __m512i __A) { __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A, (__mmask16) -1); diff --git a/gcc/config/i386/avx512pfintrin.h b/gcc/config/i386/avx512pfintrin.h index b8c0110..bc7598e 100644 --- a/gcc/config/i386/avx512pfintrin.h +++ b/gcc/config/i386/avx512pfintrin.h @@ -48,74 +48,157 @@ typedef unsigned short __mmask16; #ifdef __OPTIMIZE__ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_prefetch_i32gather_pd (__m256i index, __mmask8 mask, + void *addr, int scale, int hint) +{ + __builtin_ia32_gatherpfdpd (mask, (__v8si) index, (long long const *) addr, + scale, hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_prefetch_i32gather_ps (__m512i index, __mmask16 mask, - int const *addr, int scale, int hint) + void *addr, int scale, int hint) +{ + __builtin_ia32_gatherpfdps (mask, (__v16si) index, (int const *) addr, + scale, hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_prefetch_i64gather_pd (__m512i index, __mmask8 mask, + void *addr, int scale, int hint) { - __builtin_ia32_gatherpfdps (mask, (__v16si) index, addr, scale, hint); + __builtin_ia32_gatherpfqpd (mask, (__v8di) index, (long long const *) addr, + scale, hint); } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_prefetch_i64gather_ps (__m512i index, __mmask8 mask, - int const *addr, int scale, int hint) + void *addr, int scale, int hint) { - __builtin_ia32_gatherpfqps (mask, (__v8di) index, addr, scale, hint); + __builtin_ia32_gatherpfqps (mask, (__v8di) index, (int const *) addr, + scale, hint); } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_prefetch_i32scatter_ps (int const *addr, __m512i index, int scale, +_mm512_prefetch_i32scatter_pd (void *addr, __m256i index, int scale, int hint) { - __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) index, addr, scale, - hint); + __builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) index, + (long long const *)addr, scale, hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_prefetch_i32scatter_ps (void *addr, __m512i index, int scale, + int hint) +{ + __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) index, (int const *) addr, + scale, hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_prefetch_i32scatter_pd (void *addr, __mmask8 mask, + __m256i index, int scale, int hint) +{ + __builtin_ia32_scatterpfdpd (mask, (__v8si) index, (long long const *) addr, + scale, hint); } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_prefetch_i32scatter_ps (int const *addr, __mmask16 mask, +_mm512_mask_prefetch_i32scatter_ps (void *addr, __mmask16 mask, __m512i index, int scale, int hint) { - __builtin_ia32_scatterpfdps (mask, (__v16si) index, addr, scale, hint); + __builtin_ia32_scatterpfdps (mask, (__v16si) index, (int const *) addr, + scale, hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_prefetch_i64scatter_pd (void *addr, __m512i index, int scale, + int hint) +{ + __builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) index, (long long const *) addr, + scale, hint); } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_prefetch_i64scatter_ps (int const *addr, __m512i index, int scale, +_mm512_prefetch_i64scatter_ps (void *addr, __m512i index, int scale, int hint) { - __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) index, addr, scale, - hint); + __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) index, (int const *) addr, + scale, hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_prefetch_i64scatter_pd (void *addr, __mmask16 mask, + __m512i index, int scale, int hint) +{ + __builtin_ia32_scatterpfqpd (mask, (__v8di) index, (long long const *) addr, + scale, hint); } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_prefetch_i64scatter_ps (int const *addr, __mmask16 mask, +_mm512_mask_prefetch_i64scatter_ps (void *addr, __mmask16 mask, __m512i index, int scale, int hint) { - __builtin_ia32_scatterpfqps (mask, (__v8di) index, addr, scale, hint); + __builtin_ia32_scatterpfqps (mask, (__v8di) index, (int const *) addr, + scale, hint); } + #else +#define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \ + __builtin_ia32_gatherpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX, \ + (long long const *)ADDR, (int)SCALE, (int)HINT) + #define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \ - __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \ + __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \ (int const *)ADDR, (int)SCALE, (int)HINT) +#define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \ + __builtin_ia32_gatherpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \ + (long long const *)ADDR, (int)SCALE, (int)HINT) + #define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \ __builtin_ia32_gatherpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \ (int const *)ADDR, (int)SCALE, (int)HINT) +#define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT) \ + __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \ + (long long const *)ADDR, (int)SCALE, (int)HINT) + #define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) \ __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \ (int const *)ADDR, (int)SCALE, (int)HINT) +#define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \ + __builtin_ia32_scatterpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX, \ + (long long const *)ADDR, (int)SCALE, (int)HINT) + #define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \ __builtin_ia32_scatterpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \ (int const *)ADDR, (int)SCALE, (int)HINT) +#define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT) \ + __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \ + (long long const *)ADDR, (int)SCALE, (int)HINT) + #define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) \ __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \ (int const *)ADDR, (int)SCALE, (int)HINT) +#define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \ + __builtin_ia32_scatterpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \ + (long long const *)ADDR, (int)SCALE, (int)HINT) + #define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \ __builtin_ia32_scatterpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \ (int const *)ADDR, (int)SCALE, (int)HINT) diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index acf2f32..f3c658b 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -733,7 +733,9 @@ DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8SI, V8DI, INT) DEF_FUNCTION_TYPE (VOID, PINT, QI, V8DI, V8SI, INT) DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8DI, V8DI, INT) +DEF_FUNCTION_TYPE (VOID, QI, V8SI, PCINT64, INT, INT) DEF_FUNCTION_TYPE (VOID, HI, V16SI, PCINT, INT, INT) +DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCINT64, INT, INT) DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCINT, INT, INT) DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, ROUND) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 3cda147..547c1f1 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -28323,9 +28323,13 @@ enum ix86_builtins IX86_BUILTIN_SCATTERSIV8DI, /* AVX512PF */ + IX86_BUILTIN_GATHERPFQPD, IX86_BUILTIN_GATHERPFDPS, + IX86_BUILTIN_GATHERPFDPD, IX86_BUILTIN_GATHERPFQPS, + IX86_BUILTIN_SCATTERPFDPD, IX86_BUILTIN_SCATTERPFDPS, + IX86_BUILTIN_SCATTERPFQPD, IX86_BUILTIN_SCATTERPFQPS, /* AVX-512ER */ @@ -30855,15 +30859,27 @@ ix86_init_mmx_sse_builtins (void) IX86_BUILTIN_SCATTERDIV8DI); /* AVX512PF */ + def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd", + VOID_FTYPE_QI_V8SI_PCINT64_INT_INT, + IX86_BUILTIN_GATHERPFDPD); def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps", VOID_FTYPE_HI_V16SI_PCINT_INT_INT, IX86_BUILTIN_GATHERPFDPS); + def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd", + VOID_FTYPE_QI_V8DI_PCINT64_INT_INT, + IX86_BUILTIN_GATHERPFQPD); def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps", VOID_FTYPE_QI_V8DI_PCINT_INT_INT, IX86_BUILTIN_GATHERPFQPS); + def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd", + VOID_FTYPE_QI_V8SI_PCINT64_INT_INT, + IX86_BUILTIN_SCATTERPFDPD); def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps", VOID_FTYPE_HI_V16SI_PCINT_INT_INT, IX86_BUILTIN_SCATTERPFDPS); + def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd", + VOID_FTYPE_QI_V8DI_PCINT64_INT_INT, + IX86_BUILTIN_SCATTERPFQPD); def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps", VOID_FTYPE_QI_V8DI_PCINT_INT_INT, IX86_BUILTIN_SCATTERPFQPS); @@ -35509,17 +35525,30 @@ addcarryx: case IX86_BUILTIN_SCATTERDIV8DI: icode = CODE_FOR_avx512f_scatterdiv8di; goto scatter_gen; + + case IX86_BUILTIN_GATHERPFDPD: + icode = CODE_FOR_avx512pf_gatherpfv8sidf; + goto vec_prefetch_gen; case IX86_BUILTIN_GATHERPFDPS: - icode = CODE_FOR_avx512pf_gatherpfv16si; + icode = CODE_FOR_avx512pf_gatherpfv16sisf; + goto vec_prefetch_gen; + case IX86_BUILTIN_GATHERPFQPD: + icode = CODE_FOR_avx512pf_gatherpfv8didf; goto vec_prefetch_gen; case IX86_BUILTIN_GATHERPFQPS: - icode = CODE_FOR_avx512pf_gatherpfv8di; + icode = CODE_FOR_avx512pf_gatherpfv8disf; + goto vec_prefetch_gen; + case IX86_BUILTIN_SCATTERPFDPD: + icode = CODE_FOR_avx512pf_scatterpfv8sidf; goto vec_prefetch_gen; case IX86_BUILTIN_SCATTERPFDPS: - icode = CODE_FOR_avx512pf_scatterpfv16si; + icode = CODE_FOR_avx512pf_scatterpfv16sisf; + goto vec_prefetch_gen; + case IX86_BUILTIN_SCATTERPFQPD: + icode = CODE_FOR_avx512pf_scatterpfv8didf; goto vec_prefetch_gen; case IX86_BUILTIN_SCATTERPFQPS: - icode = CODE_FOR_avx512pf_scatterpfv8di; + icode = CODE_FOR_avx512pf_scatterpfv8disf; goto vec_prefetch_gen; gather_gen: diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 31e94fe..419c33a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -417,6 +417,7 @@ [V32QI V16HI V8SI (V8DI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")]) (define_mode_iterator VI48_256 [V8SI V4DI]) (define_mode_iterator VI48_512 [V16SI V8DI]) +(define_mode_iterator VI4_256_8_512 [V8SI V8DI]) ;; Int-float size matches (define_mode_iterator VI4F_128 [V4SI V4SF]) @@ -12492,10 +12493,11 @@ (set_attr "btver2_decode" "vector,vector,vector,vector") (set_attr "mode" "TI")]) -(define_expand "avx512pf_gatherpf<mode>" +;; Packed float variants +(define_expand "avx512pf_gatherpf<mode>sf" [(unspec [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand") - (mem:<ssescalarmode> + (mem:SF (match_par_dup 5 [(match_operand 2 "vsib_address_operand") (match_operand:VI48_512 1 "register_operand") @@ -12509,10 +12511,10 @@ operands[3]), UNSPEC_VSIBADDR); }) -(define_insn "*avx512pf_gatherpf<mode>_mask" +(define_insn "*avx512pf_gatherpf<mode>sf_mask" [(unspec [(match_operand:<avx512fmaskmode> 0 "register_operand" "k") - (match_operator:<ssescalarmode> 5 "vsib_mem_operator" + (match_operator:SF 5 "vsib_mem_operator" [(unspec:P [(match_operand:P 2 "vsib_address_operand" "Tv") (match_operand:VI48_512 1 "register_operand" "v") @@ -12536,10 +12538,10 @@ (set_attr "prefix" "evex") (set_attr "mode" "XI")]) -(define_insn "*avx512pf_gatherpf<mode>" +(define_insn "*avx512pf_gatherpf<mode>sf" [(unspec [(const_int -1) - (match_operator:<ssescalarmode> 4 "vsib_mem_operator" + (match_operator:SF 4 "vsib_mem_operator" [(unspec:P [(match_operand:P 1 "vsib_address_operand" "Tv") (match_operand:VI48_512 0 "register_operand" "v") @@ -12563,10 +12565,83 @@ (set_attr "prefix" "evex") (set_attr "mode" "XI")]) -(define_expand "avx512pf_scatterpf<mode>" +;; Packed double variants +(define_expand "avx512pf_gatherpf<mode>df" [(unspec [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand") - (mem:<ssescalarmode> + (mem:DF + (match_par_dup 5 + [(match_operand 2 "vsib_address_operand") + (match_operand:VI4_256_8_512 1 "register_operand") + (match_operand:SI 3 "const1248_operand")])) + (match_operand:SI 4 "const_0_to_1_operand")] + UNSPEC_GATHER_PREFETCH)] + "TARGET_AVX512PF" +{ + operands[5] + = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1], + operands[3]), UNSPEC_VSIBADDR); +}) + +(define_insn "*avx512pf_gatherpf<mode>df_mask" + [(unspec + [(match_operand:<avx512fmaskmode> 0 "register_operand" "k") + (match_operator:DF 5 "vsib_mem_operator" + [(unspec:P + [(match_operand:P 2 "vsib_address_operand" "Tv") + (match_operand:VI4_256_8_512 1 "register_operand" "v") + (match_operand:SI 3 "const1248_operand" "n")] + UNSPEC_VSIBADDR)]) + (match_operand:SI 4 "const_0_to_1_operand" "n")] + UNSPEC_GATHER_PREFETCH)] + "TARGET_AVX512PF" +{ + switch (INTVAL (operands[4])) + { + case 0: + return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}"; + case 1: + return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sse") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + +(define_insn "*avx512pf_gatherpf<mode>df" + [(unspec + [(const_int -1) + (match_operator:DF 4 "vsib_mem_operator" + [(unspec:P + [(match_operand:P 1 "vsib_address_operand" "Tv") + (match_operand:VI4_256_8_512 0 "register_operand" "v") + (match_operand:SI 2 "const1248_operand" "n")] + UNSPEC_VSIBADDR)]) + (match_operand:SI 3 "const_0_to_1_operand" "n")] + UNSPEC_GATHER_PREFETCH)] + "TARGET_AVX512PF" +{ + switch (INTVAL (operands[3])) + { + case 0: + return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}"; + case 1: + return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sse") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + +;; Packed float variants +(define_expand "avx512pf_scatterpf<mode>sf" + [(unspec + [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand") + (mem:SF (match_par_dup 5 [(match_operand 2 "vsib_address_operand") (match_operand:VI48_512 1 "register_operand") @@ -12580,10 +12655,10 @@ operands[3]), UNSPEC_VSIBADDR); }) -(define_insn "*avx512pf_scatterpf<mode>_mask" +(define_insn "*avx512pf_scatterpf<mode>sf_mask" [(unspec [(match_operand:<avx512fmaskmode> 0 "register_operand" "k") - (match_operator:<ssescalarmode> 5 "vsib_mem_operator" + (match_operator:SF 5 "vsib_mem_operator" [(unspec:P [(match_operand:P 2 "vsib_address_operand" "Tv") (match_operand:VI48_512 1 "register_operand" "v") @@ -12607,10 +12682,10 @@ (set_attr "prefix" "evex") (set_attr "mode" "XI")]) -(define_insn "*avx512pf_scatterpf<mode>" +(define_insn "*avx512pf_scatterpf<mode>sf" [(unspec [(const_int -1) - (match_operator:<ssescalarmode> 4 "vsib_mem_operator" + (match_operator:SF 4 "vsib_mem_operator" [(unspec:P [(match_operand:P 1 "vsib_address_operand" "Tv") (match_operand:VI48_512 0 "register_operand" "v") @@ -12634,6 +12709,78 @@ (set_attr "prefix" "evex") (set_attr "mode" "XI")]) +;; Packed double variants +(define_expand "avx512pf_scatterpf<mode>df" + [(unspec + [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand") + (mem:DF + (match_par_dup 5 + [(match_operand 2 "vsib_address_operand") + (match_operand:VI4_256_8_512 1 "register_operand") + (match_operand:SI 3 "const1248_operand")])) + (match_operand:SI 4 "const_0_to_1_operand")] + UNSPEC_SCATTER_PREFETCH)] + "TARGET_AVX512PF" +{ + operands[5] + = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1], + operands[3]), UNSPEC_VSIBADDR); +}) + +(define_insn "*avx512pf_scatterpf<mode>df_mask" + [(unspec + [(match_operand:<avx512fmaskmode> 0 "register_operand" "k") + (match_operator:DF 5 "vsib_mem_operator" + [(unspec:P + [(match_operand:P 2 "vsib_address_operand" "Tv") + (match_operand:VI4_256_8_512 1 "register_operand" "v") + (match_operand:SI 3 "const1248_operand" "n")] + UNSPEC_VSIBADDR)]) + (match_operand:SI 4 "const_0_to_1_operand" "n")] + UNSPEC_SCATTER_PREFETCH)] + "TARGET_AVX512PF" +{ + switch (INTVAL (operands[4])) + { + case 0: + return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}"; + case 1: + return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sse") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + +(define_insn "*avx512pf_scatterpf<mode>df" + [(unspec + [(const_int -1) + (match_operator:DF 4 "vsib_mem_operator" + [(unspec:P + [(match_operand:P 1 "vsib_address_operand" "Tv") + (match_operand:VI4_256_8_512 0 "register_operand" "v") + (match_operand:SI 2 "const1248_operand" "n")] + UNSPEC_VSIBADDR)]) + (match_operand:SI 3 "const_0_to_1_operand" "n")] + UNSPEC_SCATTER_PREFETCH)] + "TARGET_AVX512PF" +{ + switch (INTVAL (operands[3])) + { + case 0: + return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}"; + case 1: + return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sse") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>" [(set (match_operand:VF_512 0 "register_operand" "=v") (unspec:VF_512 diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index 12674ad..8fb6fb88 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -362,6 +362,10 @@ #define __builtin_ia32_gatherpfqps(A, B, C, D, E) __builtin_ia32_gatherpfqps(A, B, C, 1, 1) #define __builtin_ia32_scatterpfdps(A, B, C, D, E) __builtin_ia32_scatterpfdps(A, B, C, 1, 1) #define __builtin_ia32_scatterpfqps(A, B, C, D, E) __builtin_ia32_scatterpfqps(A, B, C, 1, 1) +#define __builtin_ia32_gatherpfdpd(A, B, C, D, E) __builtin_ia32_gatherpfdpd(A, B, C, 1, 1) +#define __builtin_ia32_gatherpfqpd(A, B, C, D, E) __builtin_ia32_gatherpfqpd(A, B, C, 1, 1) +#define __builtin_ia32_scatterpfdpd(A, B, C, D, E) __builtin_ia32_scatterpfdpd(A, B, C, 1, 1) +#define __builtin_ia32_scatterpfqpd(A, B, C, D, E) __builtin_ia32_scatterpfqpd(A, B, C, 1, 1) /* shaintrin.h */ #define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1) diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c index 79dbf9d..66e358a 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c @@ -15,10 +15,10 @@ volatile __mmask16 m; void extern avx512f_test (void) { - x = _mm512_loadu_si512 (p); + x = _mm512_loadu_epi32 (p); x = _mm512_mask_loadu_epi32 (x, m, p); x = _mm512_maskz_loadu_epi32 (m, p); - _mm512_storeu_si512 (p, x); + _mm512_storeu_epi32 (p, x); _mm512_mask_storeu_epi32 (p, m, x); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-2.c index f1ae73c..0333d31 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-2.c @@ -33,8 +33,8 @@ TEST (void) } #if AVX512F_LEN == 512 - res1.x = _mm512_loadu_si512 (s1.a); - _mm512_storeu_si512 (res2.a, s2.x); + res1.x = _mm512_loadu_epi32 (s1.a); + _mm512_storeu_epi32 (res2.a, s2.x); #endif res3.x = INTRINSIC (_mask_loadu_epi32) (res3.x, mask, s1.a); res4.x = INTRINSIC (_maskz_loadu_epi32) (mask, s1.a); diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c index 600dfd2..c044f42 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c @@ -17,8 +17,8 @@ { \ dst_ref = ((rel) << i) | dst_ref; \ } \ - source1.x = _mm512_loadu_si512 (s1); \ - source2.x = _mm512_loadu_si512 (s2); \ + source1.x = _mm512_loadu_epi32 (s1); \ + source2.x = _mm512_loadu_epi32 (s2); \ dst1 = _mm512_cmp_epi32_mask (source1.x, source2.x, imm);\ dst2 = _mm512_mask_cmp_epi32_mask (mask, source1.x, source2.x, imm);\ if (dst_ref != dst1) abort(); \ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c index 2a9ceb6..e3a90d8 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c @@ -18,8 +18,8 @@ __mmask8 dst_ref; { \ dst_ref = ((rel) << i) | dst_ref; \ } \ - source1.x = _mm512_loadu_si512 (s1); \ - source2.x = _mm512_loadu_si512 (s2); \ + source1.x = _mm512_loadu_epi32 (s1); \ + source2.x = _mm512_loadu_epi32 (s2); \ dst1 = _mm512_cmp_epi64_mask (source1.x, source2.x, imm);\ dst2 = _mm512_mask_cmp_epi64_mask (mask, source1.x, source2.x, imm);\ if (dst_ref != dst1) abort(); \ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c index c0bb978..a90baf9 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c @@ -17,8 +17,8 @@ { \ dst_ref = ((rel) << i) | dst_ref; \ } \ - source1.x = _mm512_loadu_si512 (s1); \ - source2.x = _mm512_loadu_si512 (s2); \ + source1.x = _mm512_loadu_epi32 (s1); \ + source2.x = _mm512_loadu_epi32 (s2); \ dst1 = _mm512_cmp_epu32_mask (source1.x, source2.x, imm);\ dst2 = _mm512_mask_cmp_epu32_mask (mask, source1.x, source2.x, imm);\ if (dst_ref != dst1) abort(); \ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c index 3bd1b86..c49f5e4 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c @@ -17,8 +17,8 @@ { \ dst_ref = ((rel) << i) | dst_ref; \ } \ - source1.x = _mm512_loadu_si512 (s1); \ - source2.x = _mm512_loadu_si512 (s2); \ + source1.x = _mm512_loadu_epi32 (s1); \ + source2.x = _mm512_loadu_epi32 (s2); \ dst1 = _mm512_cmp_epu64_mask (source1.x, source2.x, imm);\ dst2 = _mm512_mask_cmp_epu64_mask (mask, source1.x, source2.x, imm);\ if (dst_ref != dst1) abort(); \ diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c new file mode 100644 index 0000000..1368b7a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512pf -O2" } */ +/* { dg-final { scan-assembler-times "vgatherpf0dpd\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */ + +#include <immintrin.h> + +volatile __m256i idx; +volatile __mmask8 m8; +void *base; + +void extern +avx512pf_test (void) +{ + _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, 0); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c new file mode 100644 index 0000000..61a81bb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512pf -O2" } */ +/* { dg-final { scan-assembler-times "vgatherpf0qpd\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */ + +#include <immintrin.h> + +volatile __m512i idx; +volatile __mmask8 m8; +int *base; + +void extern +avx512pf_test (void) +{ + _mm512_mask_prefetch_i64gather_pd (idx, m8, base, 8, 0); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c new file mode 100644 index 0000000..5bc7599 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512pf -O2" } */ +/* { dg-final { scan-assembler-times "vgatherpf1dpd\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */ + +#include <immintrin.h> + +volatile __m256i idx; +volatile __mmask8 m8; +int *base; + +void extern +avx512pf_test (void) +{ + _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, 1); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c new file mode 100644 index 0000000..96610db --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512pf -O2" } */ +/* { dg-final { scan-assembler-times "vgatherpf1qpd\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */ + +#include <immintrin.h> + +volatile __m512i idx; +volatile __mmask8 m8; +int *base; + +void extern +avx512pf_test (void) +{ + _mm512_mask_prefetch_i64gather_pd (idx, m8, base, 8, 1); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dpd-1.c new file mode 100644 index 0000000..83c31cc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dpd-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512pf -O2" } */ +/* { dg-final { scan-assembler-times "vscatterpf0dpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 2 } } */ +/* { dg-final { scan-assembler-times "vscatterpf0dpd\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */ + +#include <immintrin.h> + +volatile __m256i idx; +volatile __mmask8 m8; +void *base; + +void extern +avx512pf_test (void) +{ + _mm512_prefetch_i32scatter_pd (base, idx, 8, 0); + _mm512_mask_prefetch_i32scatter_pd (base, m8, idx, 8, 0); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qpd-1.c new file mode 100644 index 0000000..31172f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qpd-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512pf -O2" } */ +/* { dg-final { scan-assembler-times "vscatterpf0qpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 2 } } */ +/* { dg-final { scan-assembler-times "vscatterpf0qpd\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */ + +#include <immintrin.h> + +volatile __m512i idx; +volatile __mmask8 m8; +void *base; + +void extern +avx512pf_test (void) +{ + _mm512_prefetch_i64scatter_pd (base, idx, 8, 0); + _mm512_mask_prefetch_i64scatter_pd (base, m8, idx, 8, 0); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dpd-1.c new file mode 100644 index 0000000..205505b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dpd-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512pf -O2" } */ +/* { dg-final { scan-assembler-times "vscatterpf1dpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 2 } } */ +/* { dg-final { scan-assembler-times "vscatterpf1dpd\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */ + +#include <immintrin.h> + +volatile __m256i idx; +volatile __mmask8 m8; +void *base; + +void extern +avx512pf_test (void) +{ + _mm512_prefetch_i32scatter_pd (base, idx, 8, 1); + _mm512_mask_prefetch_i32scatter_pd (base, m8, idx, 8, 1); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qpd-1.c new file mode 100644 index 0000000..64d7dfa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qpd-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512pf -O2" } */ +/* { dg-final { scan-assembler-times "vscatterpf1qpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 2 } } */ +/* { dg-final { scan-assembler-times "vscatterpf1qpd\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */ + +#include <immintrin.h> + +volatile __m512i idx; +volatile __mmask8 m8; +int *base; + +void extern +avx512pf_test (void) +{ + _mm512_prefetch_i64scatter_pd (base, idx, 8, 1); + _mm512_mask_prefetch_i64scatter_pd (base, m8, idx, 8, 1); +} diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index c5d8876..643eb99 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -523,26 +523,30 @@ test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i, __mmask16, void const *, 1 test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const *, __mmask16, __m512i, 1, 1) test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *, 1, 1) test_3vx (_mm512_mask_prefetch_i64scatter_ps, void const *, __mmask8, __m512i, 1, 1) +test_3vx (_mm512_mask_prefetch_i32gather_pd, __m256i, __mmask8, void const *, 1, 1) +test_3vx (_mm512_mask_prefetch_i32scatter_pd, void const *, __mmask8, __m256i, 1, 1) +test_3vx (_mm512_mask_prefetch_i64gather_pd, __m512i, __mmask8, void const *, 1, 1) +test_3vx (_mm512_mask_prefetch_i64scatter_pd, void const *, __mmask8, __m512i, 1, 1) /* avx512erintrin.h */ -test_1 (_mm512_exp2a23_round_pd, __m512d, __m512d, 1) -test_1 (_mm512_exp2a23_round_ps, __m512, __m512, 1) -test_1 (_mm512_rcp28_round_pd, __m512d, __m512d, 1) -test_1 (_mm512_rcp28_round_ps, __m512, __m512, 1) -test_1 (_mm512_rsqrt28_round_pd, __m512d, __m512d, 1) -test_1 (_mm512_rsqrt28_round_ps, __m512, __m512, 1) -test_2 (_mm512_maskz_exp2a23_round_pd, __m512d, __mmask8, __m512d, 1) -test_2 (_mm512_maskz_exp2a23_round_ps, __m512, __mmask16, __m512, 1) -test_2 (_mm512_maskz_rcp28_round_pd, __m512d, __mmask8, __m512d, 1) -test_2 (_mm512_maskz_rcp28_round_ps, __m512, __mmask16, __m512, 1) -test_2 (_mm512_maskz_rsqrt28_round_pd, __m512d, __mmask8, __m512d, 1) -test_2 (_mm512_maskz_rsqrt28_round_ps, __m512, __mmask16, __m512, 1) -test_3 (_mm512_mask_exp2a23_round_pd, __m512d, __m512d, __mmask8, __m512d, 1) -test_3 (_mm512_mask_exp2a23_round_ps, __m512, __m512, __mmask16, __m512, 1) -test_3 (_mm512_mask_rcp28_round_pd, __m512d, __m512d, __mmask8, __m512d, 1) -test_3 (_mm512_mask_rcp28_round_ps, __m512, __m512, __mmask16, __m512, 1) -test_3 (_mm512_mask_rsqrt28_round_pd, __m512d, __m512d, __mmask8, __m512d, 1) -test_3 (_mm512_mask_rsqrt28_round_ps, __m512, __m512, __mmask16, __m512, 1) +test_1 (_mm512_exp2a23_round_pd, __m512d, __m512d, 5) +test_1 (_mm512_exp2a23_round_ps, __m512, __m512, 5) +test_1 (_mm512_rcp28_round_pd, __m512d, __m512d, 5) +test_1 (_mm512_rcp28_round_ps, __m512, __m512, 5) +test_1 (_mm512_rsqrt28_round_pd, __m512d, __m512d, 5) +test_1 (_mm512_rsqrt28_round_ps, __m512, __m512, 5) +test_2 (_mm512_maskz_exp2a23_round_pd, __m512d, __mmask8, __m512d, 5) +test_2 (_mm512_maskz_exp2a23_round_ps, __m512, __mmask16, __m512, 5) +test_2 (_mm512_maskz_rcp28_round_pd, __m512d, __mmask8, __m512d, 5) +test_2 (_mm512_maskz_rcp28_round_ps, __m512, __mmask16, __m512, 5) +test_2 (_mm512_maskz_rsqrt28_round_pd, __m512d, __mmask8, __m512d, 5) +test_2 (_mm512_maskz_rsqrt28_round_ps, __m512, __mmask16, __m512, 5) +test_3 (_mm512_mask_exp2a23_round_pd, __m512d, __m512d, __mmask8, __m512d, 5) +test_3 (_mm512_mask_exp2a23_round_ps, __m512, __m512, __mmask16, __m512, 5) +test_3 (_mm512_mask_rcp28_round_pd, __m512d, __m512d, __mmask8, __m512d, 5) +test_3 (_mm512_mask_rcp28_round_ps, __m512, __m512, __mmask16, __m512, 5) +test_3 (_mm512_mask_rsqrt28_round_pd, __m512d, __m512d, __mmask8, __m512d, 5) +test_3 (_mm512_mask_rsqrt28_round_ps, __m512, __m512, __mmask16, __m512, 5) /* shaintrin.h */ test_2 (_mm_sha1rnds4_epu32, __m128i, __m128i, __m128i, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 630c952..7d68be1 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -646,6 +646,11 @@ test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const *, __mmask16, __m512i, test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *, 1, 1) test_3vx (_mm512_mask_prefetch_i64scatter_ps, void const *, __mmask8, __m512i, 1, 1) +test_3vx (_mm512_mask_prefetch_i32gather_pd, __m256i, __mmask8, void const *, 1, 1) +test_3vx (_mm512_mask_prefetch_i32scatter_pd, void const *, __mmask8, __m256i, 1, 1) +test_3vx (_mm512_mask_prefetch_i64gather_pd, __m512i, __mmask8, long long *, 1, 1) +test_3vx (_mm512_mask_prefetch_i64scatter_pd, void const *, __mmask8, __m512i, 1, 1) + /* avx512erintrin.h */ test_1 (_mm512_exp2a23_round_pd, __m512d, __m512d, 5) test_1 (_mm512_exp2a23_round_ps, __m512, __m512, 5) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 309cd73..77c8d67 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -365,6 +365,10 @@ #define __builtin_ia32_gatherpfqps(A, B, C, D, E) __builtin_ia32_gatherpfqps(A, B, C, 1, 1) #define __builtin_ia32_scatterpfdps(A, B, C, D, E) __builtin_ia32_scatterpfdps(A, B, C, 1, 1) #define __builtin_ia32_scatterpfqps(A, B, C, D, E) __builtin_ia32_scatterpfqps(A, B, C, 1, 1) +#define __builtin_ia32_gatherpfdpd(A, B, C, D, E) __builtin_ia32_gatherpfdpd(A, B, C, 1, 1) +#define __builtin_ia32_gatherpfqpd(A, B, C, D, E) __builtin_ia32_gatherpfqpd(A, B, C, 1, 1) +#define __builtin_ia32_scatterpfdpd(A, B, C, D, E) __builtin_ia32_scatterpfdpd(A, B, C, 1, 1) +#define __builtin_ia32_scatterpfqpd(A, B, C, D, E) __builtin_ia32_scatterpfqpd(A, B, C, 1, 1) /* avx512erintrin.h */ #define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask (A, B, C, 5)