> > + /* Support unified builtin. */ > + || (mask2 == OPTION_MASK_ISA2_AVXVNNI) > > I don't think we gain anything with unified builtins. Better, just > introduce separate builtins, e.g for >
Unified builtins are used for unified intrinsics, intrinsics users may prefer same interface and let compiler decide encoding version. Separate buitins may cause some defination ambiguous when target attribute is used, see avx-vnni-2.c. We also provide separate intrinsics interface for compatibility with different compilers(llvm/msvc/icc). > -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, > CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", > IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) > +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, > OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusd_v8si, > "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, > (int) V8SI_FTYPE_V8SI_V8SI_V8SI) > > add __builtin_ia32_vpdbusd_avx_v8si with the same CODE_FOR. > > This will remove the need for: > > + if ((((bisa & (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL)) > + == (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL)) > + || (bisa2 & OPTION_MASK_ISA2_AVXVNNI) != 0) > + && (((isa & (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL)) > + == (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL)) > + || (isa2 & OPTION_MASK_ISA2_AVXVNNI) != 0)) > + { > + isa |= OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL; > + isa2 |= OPTION_MASK_ISA2_AVXVNNI; > + } > > which is already complex with AVX512VL processing. > > +#ifdef __AVXVNNI__ > +#define _mm256_dpbusd_avx_epi32(A, B, C) \ > + _mm256_dpbusd_epi32((A), (B), (C)) > +#define _mm_dpbusd_avx_epi32(A, B, C) \ > + _mm_dpbusd_epi32((A), (B), (C)) > +#define _mm256_dpbusds_avx_epi32(A, B, C) \ > + _mm256_dpbusds_epi32((A), (B), (C)) > +#define _mm_dpbusds_avx_epi32(A, B, C) \ > + _mm_dpbusds_epi32((A), (B), (C)) > +#define _mm256_dpwssd_avx_epi32(A, B, C) \ > + _mm256_dpwssd_epi32((A), (B), (C)) > +#define _mm_dpwssd_avx_epi32(A, B, C) \ > + _mm_dpwssd_epi32((A), (B), (C)) > +#define _mm256_dpwssds_avx_epi32(A, B, C) \ > + _mm256_dpwssds_epi32((A), (B), (C)) > +#define _mm_dpwssds_avx_epi32(A, B, C) \ > + _mm_dpwssds_epi32((A), (B), (C)) > +#endif /* __AVXVNNI__ */ > + > > The above won't be needed with separate builtins. > > Please repost the patch, I think that the following part(s) of the > patch were already committed via another patch: > > @@ -399,8 +403,8 @@ ix86_handle_option (struct gcc_options *opts, > { > opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET; > opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET; > - opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET; > - opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET; > + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX2_UNSET; > + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX2_UNSET; > } > return true; > Yes. > No review for the sse.md and for testcases. > > Uros. Update the patch based on latest trunk. -- BR, Hongtao
From 881868b8c9f5925c63a953454f45f5e0a3c8ea4f Mon Sep 17 00:00:00 2001 From: liuhongt <hongtao....@intel.com> Date: Tue, 13 Oct 2020 16:16:16 +0800 Subject: [PATCH] Support Intel AVX VNNI 2020-10-13 Hongtao Liu <hongtao....@intel.com> Hongyu Wang <hongyu.w...@intel.com> gcc/ * common/config/i386/cpuinfo.h (get_available_features): Detect AVXVNNI. * common/config/i386/i386-common.c (OPTION_MASK_ISA2_AVXVNNI_SET, OPTION_MASK_ISA2_AVXVNNI_UNSET, OPTION_MASK_ISA2_AVX2_UNSET): New. (ix86_hanlde_option): Handle -mavxvnni, unset avxvnni when avx2 is disabled. * common/config/i386/i386-cpuinfo.h (enum processor_features): Add FEATURE_AVXVNNI. * common/config/i386/i386-isas.h: Add ISA_NAMES_TABLE_ENTRY for avxvnni. * config.gcc: Add avxvnniintrin.h. * config/i386/avx512vnniintrin.h: Remove 128/256 bit non-mask intrinsics. * config/i386/avxvnniintrin.h: New header file. * config/i386/cpuid.h (bit_AVXVNNI): New. * config/i386/i386-builtins.c (def_builtin): Handle AVXVNNI mask for unified builtin. * config/i386/i386-builtin.def (BDESC): Adjust AVX512VNNI builtins for AVXVNNI. * config/i386/i386-c.c (ix86_target_macros_internal): Define __AVXVNNI__. * config/i386/i386-expand.c (ix86_expand_builtin): Handle bisa for AVXVNNI to support unified intrinsic name, since there is no dependency between AVX512VNNI and AVXVNNI. * config/i386/i386-options.c (isa2_opts): Add -mavxvnni. (ix86_valid_target_attribute_inner_p): Handle avxnnni. (ix86_option_override_internal): Ditto. * config/i386/i386.h (TARGET_AVXVNNI, TARGET_AVXVNNI_P, TARGET_AVXVNNI_P, PTA_AVXVNNI): New. (PTA_SAPPHIRERAPIDS): Add AVX_VNNI. (PTA_ALDERLAKE): Likewise. * config/i386/i386.md ("isa"): Add avxvnni, avx512vnnivl. ("enabled"): Adjust for avxvnni and avx512vnnivl. * config/i386/i386.opt: Add option -mavxvnni. * config/i386/immintrin.h: Include avxvnniintrin.h. * config/i386/sse.md (vpdpbusd_<mode>): Adjust for AVXVNNI. (vpdpbusds_<mode>): Likewise. (vpdpwssd_<mode>): Likewise. (vpdpwssds_<mode>): Likewise. (vpdpbusd_v16si): New. (vpdpbusds_v16si): Likewise. (vpdpwssd_v16si): Likewise. (vpdpwssds_v16si): Likewise. * doc/invoke.texi: Document -mavxvnni. * doc/extend.texi: Document avxvnni. * doc/sourcebuild.texi: Document target avxvnni. gcc/testsuite/ * gcc.target/i386/avx512vl-vnni-1.c: Rename.. * gcc.target/i386/avx512vl-vnni-1a.c: To This. * gcc.target/i386/avx512vl-vnni-1b.c: New test. * gcc.target/i386/avx512vl-vnni-2.c: Ditto. * gcc.target/i386/avx512vl-vnni-3.c: Ditto. * gcc.target/i386/avx-vnni-1.c: Ditto. * gcc.target/i386/avx-vnni-2.c: Ditto. * gcc.target/i386/avx-vnni-3.c: Ditto. * gcc.target/i386/avx-vnni-4.c: Ditto. * gcc.target/i386/avx-vnni-5.c: Ditto. * gcc.target/i386/avx-vnni-6.c: Ditto. * gcc.target/i386/avx-vpdpbusd-2.c: Ditto. * gcc.target/i386/avx-vpdpbusds-2.c: Ditto. * gcc.target/i386/avx-vpdpwssd-2.c: Ditto. * gcc.target/i386/avx-vpdpwssds-2.c: Ditto. * gcc.target/i386/vnni_inline_error.c: Ditto. * gcc.target/i386/avx512vnnivl-builtin.c: Ditto. * gcc.target/i386/avxvnni-builtin.c: Ditto. * gcc.target/i386/funcspec-56.inc: Add new target attribute. * gcc.target/i386/sse-12.c: Add -mavxvnni. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * g++.dg/other/i386-2.C: Ditto. * g++.dg/other/i386-3.C: Ditto. * lib/target-supports.exp (check_effective_target_avxvnni): New proc. --- gcc/common/config/i386/cpuinfo.h | 2 + gcc/common/config/i386/i386-common.c | 19 +++ gcc/common/config/i386/i386-cpuinfo.h | 1 + gcc/common/config/i386/i386-isas.h | 1 + gcc/config.gcc | 4 +- gcc/config/i386/avx512vnnivlintrin.h | 64 ---------- gcc/config/i386/avxvnniintrin.h | 90 ++++++++++++++ gcc/config/i386/cpuid.h | 1 + gcc/config/i386/i386-builtin.def | 18 +-- gcc/config/i386/i386-builtins.c | 2 + gcc/config/i386/i386-c.c | 2 + gcc/config/i386/i386-expand.c | 13 ++ gcc/config/i386/i386-options.c | 8 +- gcc/config/i386/i386.h | 7 +- gcc/config/i386/i386.md | 5 +- gcc/config/i386/i386.opt | 5 + gcc/config/i386/immintrin.h | 2 + gcc/config/i386/sse.md | 117 +++++++++++++----- gcc/doc/extend.texi | 5 + gcc/doc/invoke.texi | 11 +- gcc/doc/sourcebuild.texi | 3 + gcc/testsuite/g++.dg/other/i386-2.C | 2 +- gcc/testsuite/g++.dg/other/i386-3.C | 2 +- gcc/testsuite/gcc.target/i386/avx-vnni-1.c | 29 +++++ gcc/testsuite/gcc.target/i386/avx-vnni-2.c | 30 +++++ gcc/testsuite/gcc.target/i386/avx-vnni-3.c | 16 +++ gcc/testsuite/gcc.target/i386/avx-vnni-4.c | 16 +++ gcc/testsuite/gcc.target/i386/avx-vnni-5.c | 29 +++++ gcc/testsuite/gcc.target/i386/avx-vnni-6.c | 29 +++++ .../gcc.target/i386/avx-vpdpbusd-2.c | 71 +++++++++++ .../gcc.target/i386/avx-vpdpbusds-2.c | 71 +++++++++++ .../gcc.target/i386/avx-vpdpwssd-2.c | 67 ++++++++++ .../gcc.target/i386/avx-vpdpwssds-2.c | 67 ++++++++++ .../{avx512vl-vnni-1.c => avx512vl-vnni-1a.c} | 0 .../gcc.target/i386/avx512vl-vnni-1b.c | 69 +++++++++++ .../gcc.target/i386/avx512vl-vnni-2.c | 30 +++++ .../gcc.target/i386/avx512vl-vnni-3.c | 47 +++++++ .../gcc.target/i386/avx512vnnivl-builtin.c | 8 ++ .../gcc.target/i386/avxvnni-builtin.c | 8 ++ gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + gcc/testsuite/gcc.target/i386/sse-12.c | 2 +- gcc/testsuite/gcc.target/i386/sse-13.c | 2 +- gcc/testsuite/gcc.target/i386/sse-14.c | 2 +- gcc/testsuite/gcc.target/i386/sse-22.c | 4 +- gcc/testsuite/gcc.target/i386/sse-23.c | 2 +- .../gcc.target/i386/vnni_inline_error.c | 13 ++ gcc/testsuite/lib/target-supports.exp | 12 ++ 47 files changed, 887 insertions(+), 123 deletions(-) create mode 100644 gcc/config/i386/avxvnniintrin.h create mode 100644 gcc/testsuite/gcc.target/i386/avx-vnni-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vnni-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vnni-3.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vnni-4.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vnni-5.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vnni-6.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vpdpbusd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vpdpbusds-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vpdpwssd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vpdpwssds-2.c rename gcc/testsuite/gcc.target/i386/{avx512vl-vnni-1.c => avx512vl-vnni-1a.c} (100%) create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vnni-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vnni-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vnni-3.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vnnivl-builtin.c create mode 100644 gcc/testsuite/gcc.target/i386/avxvnni-builtin.c create mode 100644 gcc/testsuite/gcc.target/i386/vnni_inline_error.c diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h index 7a93e170608..41728a2fceb 100644 --- a/gcc/common/config/i386/cpuinfo.h +++ b/gcc/common/config/i386/cpuinfo.h @@ -713,6 +713,8 @@ get_available_features (struct __processor_model *cpu_model, set_feature (FEATURE_AVX512BF16); if (eax & bit_HRESET) set_feature (FEATURE_HRESET); + if (eax & bit_AVXVNNI) + set_feature (FEATURE_AVXVNNI); } } diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index e29320d68cc..d9b26c9e277 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -84,6 +84,7 @@ along with GCC; see the file COPYING3. If not see (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512F_SET) #define OPTION_MASK_ISA_AVX512VNNI_SET \ (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512F_SET) +#define OPTION_MASK_ISA2_AVXVNNI_SET OPTION_MASK_ISA2_AVXVNNI #define OPTION_MASK_ISA_AVX512VPOPCNTDQ_SET \ (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512F_SET) #define OPTION_MASK_ISA_AVX512BITALG_SET \ @@ -206,6 +207,8 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_XSAVEOPT_UNSET OPTION_MASK_ISA_XSAVEOPT #define OPTION_MASK_ISA_AVX2_UNSET \ (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET) +#define OPTION_MASK_ISA2_AVX2_UNSET \ + (OPTION_MASK_ISA2_AVXVNNI_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET) #define OPTION_MASK_ISA_AVX512F_UNSET \ (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \ | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \ @@ -228,6 +231,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA2_AVX5124VNNIW_UNSET OPTION_MASK_ISA2_AVX5124VNNIW #define OPTION_MASK_ISA_AVX512VBMI2_UNSET OPTION_MASK_ISA_AVX512VBMI2 #define OPTION_MASK_ISA_AVX512VNNI_UNSET OPTION_MASK_ISA_AVX512VNNI +#define OPTION_MASK_ISA2_AVXVNNI_UNSET OPTION_MASK_ISA2_AVXVNNI #define OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET OPTION_MASK_ISA_AVX512VPOPCNTDQ #define OPTION_MASK_ISA_AVX512BITALG_UNSET OPTION_MASK_ISA_AVX512BITALG #define OPTION_MASK_ISA2_AVX512BF16_UNSET OPTION_MASK_ISA2_AVX512BF16 @@ -882,6 +886,21 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mavxvnni: + if (value) + { + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXVNNI_SET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVXVNNI_SET; + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET; + } + else + { + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVXVNNI_UNSET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVXVNNI_UNSET; + } + return true; + case OPT_msgx: if (value) { diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h index 2138220aba2..af02be57812 100644 --- a/gcc/common/config/i386/i386-cpuinfo.h +++ b/gcc/common/config/i386/i386-cpuinfo.h @@ -224,6 +224,7 @@ enum processor_features FEATURE_KL, FEATURE_AESKLE, FEATURE_WIDEKL, + FEATURE_AVXVNNI, CPU_FEATURE_MAX }; diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h index 921db066471..c4fd0363da2 100644 --- a/gcc/common/config/i386/i386-isas.h +++ b/gcc/common/config/i386/i386-isas.h @@ -168,4 +168,5 @@ ISA_NAMES_TABLE_START ISA_NAMES_TABLE_ENTRY("kl", FEATURE_KL, P_NONE, "-mkl") ISA_NAMES_TABLE_ENTRY("aeskle", FEATURE_AESKLE, P_NONE, NULL) ISA_NAMES_TABLE_ENTRY("widekl", FEATURE_WIDEKL, P_NONE, "-mwidekl") + ISA_NAMES_TABLE_ENTRY("avxvnni", FEATURE_AVXVNNI, P_NONE, "-mavxvnni") ISA_NAMES_TABLE_END diff --git a/gcc/config.gcc b/gcc/config.gcc index dc6d68bd4eb..15318b20ce0 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -414,7 +414,7 @@ i[34567]86-*-*) avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h amxbf16intrin.h x86gprintrin.h uintrintrin.h - hresetintrin.h keylockerintrin.h" + hresetintrin.h keylockerintrin.h avxvnniintrin.h" ;; x86_64-*-*) cpu_type=i386 @@ -451,7 +451,7 @@ x86_64-*-*) avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h amxbf16intrin.h x86gprintrin.h uintrintrin.h - hresetintrin.h keylockerintrin.h" + hresetintrin.h keylockerintrin.h avxvnniintrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/avx512vnnivlintrin.h b/gcc/config/i386/avx512vnnivlintrin.h index b4a6db37ba4..9118850007b 100644 --- a/gcc/config/i386/avx512vnnivlintrin.h +++ b/gcc/config/i386/avx512vnnivlintrin.h @@ -34,14 +34,6 @@ #define __DISABLE_AVX512VNNIVL__ #endif /* __AVX512VNNIVL__ */ -extern __inline __m256i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_dpbusd_epi32 (__m256i __A, __m256i __B, __m256i __C) -{ - return (__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si)__A, (__v8si) __B, - (__v8si) __C); -} - extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_dpbusd_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) @@ -58,14 +50,6 @@ _mm256_maskz_dpbusd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) (__v8si) __C, (__v8si) __D, (__mmask8)__A); } -extern __inline __m128i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_dpbusd_epi32 (__m128i __A, __m128i __B, __m128i __C) -{ - return (__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si)__A, (__v4si) __B, - (__v4si) __C); -} - extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_dpbusd_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) @@ -82,14 +66,6 @@ _mm_maskz_dpbusd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) (__v4si) __C, (__v4si) __D, (__mmask8)__A); } -extern __inline __m256i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_dpbusds_epi32 (__m256i __A, __m256i __B, __m256i __C) -{ - return (__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si)__A, (__v8si) __B, - (__v8si) __C); -} - extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_dpbusds_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) @@ -107,14 +83,6 @@ _mm256_maskz_dpbusds_epi32 (__mmask8 __A, __m256i __B, __m256i __C, (__v8si) __C, (__v8si) __D, (__mmask8)__A); } -extern __inline __m128i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_dpbusds_epi32 (__m128i __A, __m128i __B, __m128i __C) -{ - return (__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si)__A, (__v4si) __B, - (__v4si) __C); -} - extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_dpbusds_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) @@ -131,14 +99,6 @@ _mm_maskz_dpbusds_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) (__v4si) __C, (__v4si) __D, (__mmask8)__A); } -extern __inline __m256i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_dpwssd_epi32 (__m256i __A, __m256i __B, __m256i __C) -{ - return (__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si)__A, (__v8si) __B, - (__v8si) __C); -} - extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_dpwssd_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) @@ -155,14 +115,6 @@ _mm256_maskz_dpwssd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) (__v8si) __C, (__v8si) __D, (__mmask8)__A); } -extern __inline __m128i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_dpwssd_epi32 (__m128i __A, __m128i __B, __m128i __C) -{ - return (__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si)__A, (__v4si) __B, - (__v4si) __C); -} - extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_dpwssd_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) @@ -179,14 +131,6 @@ _mm_maskz_dpwssd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) (__v4si) __C, (__v4si) __D, (__mmask8)__A); } -extern __inline __m256i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_dpwssds_epi32 (__m256i __A, __m256i __B, __m256i __C) -{ - return (__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si)__A, (__v8si) __B, - (__v8si) __C); -} - extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_dpwssds_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) @@ -204,14 +148,6 @@ _mm256_maskz_dpwssds_epi32 (__mmask8 __A, __m256i __B, __m256i __C, (__v8si) __C, (__v8si) __D, (__mmask8)__A); } -extern __inline __m128i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_dpwssds_epi32 (__m128i __A, __m128i __B, __m128i __C) -{ - return (__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si)__A, (__v4si) __B, - (__v4si) __C); -} - extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_dpwssds_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) diff --git a/gcc/config/i386/avxvnniintrin.h b/gcc/config/i386/avxvnniintrin.h new file mode 100644 index 00000000000..9f815c98b96 --- /dev/null +++ b/gcc/config/i386/avxvnniintrin.h @@ -0,0 +1,90 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use <avxvnniintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AVXVNNIINTRIN_H_INCLUDED +#define _AVXVNNIINTRIN_H_INCLUDED + +#ifdef __AVXVNNI__ +#define _mm256_dpbusd_avx_epi32(A, B, C) \ + _mm256_dpbusd_epi32((A), (B), (C)) +#define _mm_dpbusd_avx_epi32(A, B, C) \ + _mm_dpbusd_epi32((A), (B), (C)) +#define _mm256_dpbusds_avx_epi32(A, B, C) \ + _mm256_dpbusds_epi32((A), (B), (C)) +#define _mm_dpbusds_avx_epi32(A, B, C) \ + _mm_dpbusds_epi32((A), (B), (C)) +#define _mm256_dpwssd_avx_epi32(A, B, C) \ + _mm256_dpwssd_epi32((A), (B), (C)) +#define _mm_dpwssd_avx_epi32(A, B, C) \ + _mm_dpwssd_epi32((A), (B), (C)) +#define _mm256_dpwssds_avx_epi32(A, B, C) \ + _mm256_dpwssds_epi32((A), (B), (C)) +#define _mm_dpwssds_avx_epi32(A, B, C) \ + _mm_dpwssds_epi32((A), (B), (C)) +#endif /* __AVXVNNI__ */ + +#define _mm256_dpbusd_epi32(A, B, C) \ + ((__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) (A), \ + (__v8si) (B), \ + (__v8si) (C))) + +#define _mm_dpbusd_epi32(A, B, C) \ + ((__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) (A), \ + (__v4si) (B), \ + (__v4si) (C))) + +#define _mm256_dpbusds_epi32(A, B, C) \ + ((__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) (A), \ + (__v8si) (B), \ + (__v8si) (C))) + +#define _mm_dpbusds_epi32(A, B, C) \ + ((__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) (A), \ + (__v4si) (B), \ + (__v4si) (C))) + +#define _mm256_dpwssd_epi32(A, B, C) \ + ((__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) (A), \ + (__v8si) (B), \ + (__v8si) (C))) + +#define _mm_dpwssd_epi32(A, B, C) \ + ((__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) (A), \ + (__v4si) (B), \ + (__v4si) (C))) + +#define _mm256_dpwssds_epi32(A, B, C) \ + ((__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) (A), \ + (__v8si) (B), \ + (__v8si) (C))) + +#define _mm_dpwssds_epi32(A, B, C) \ + ((__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) (A), \ + (__v4si) (B), \ + (__v4si) (C))) + +#endif /* _AVXVNNIINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 595b4238ba5..d2d42f71a63 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -25,6 +25,7 @@ #define _CPUID_H_INCLUDED /* %eax */ +#define bit_AVXVNNI (1 << 4) #define bit_AVX512BF16 (1 << 5) #define bit_HRESET (1 << 22) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 4d38ceab087..67d5f2efc74 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2626,45 +2626,45 @@ BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512B BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8mulb_v16qi, "__builtin_ia32_vgf2p8mulb_v16qi", IX86_BUILTIN_VGF2P8MULB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8mulb_v16qi_mask, "__builtin_ia32_vgf2p8mulb_v16qi_mask", IX86_BUILTIN_VGF2P8MULB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI) -/* VNNI */ +/* AVX512_VNNI */ BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si, "__builtin_ia32_vpdpbusd_v4si", IX86_BUILTIN_VPDPBUSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusd_v4si, "__builtin_ia32_vpdpbusd_v4si", IX86_BUILTIN_VPDPBUSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si, "__builtin_ia32_vpdpbusds_v8si", IX86_BUILTIN_VPDPBUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusds_v8si, "__builtin_ia32_vpdpbusds_v8si", IX86_BUILTIN_VPDPBUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si, "__builtin_ia32_vpdpbusds_v4si", IX86_BUILTIN_VPDPBUSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusds_v4si, "__builtin_ia32_vpdpbusds_v4si", IX86_BUILTIN_VPDPBUSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si, "__builtin_ia32_vpdpwssd_v8si", IX86_BUILTIN_VPDPWSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssd_v8si, "__builtin_ia32_vpdpwssd_v8si", IX86_BUILTIN_VPDPWSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si, "__builtin_ia32_vpdpwssd_v4si", IX86_BUILTIN_VPDPWSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssd_v4si, "__builtin_ia32_vpdpwssd_v4si", IX86_BUILTIN_VPDPWSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si, "__builtin_ia32_vpdpwssds_v8si", IX86_BUILTIN_VPDPWSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssds_v8si, "__builtin_ia32_vpdpwssds_v8si", IX86_BUILTIN_VPDPWSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si, "__builtin_ia32_vpdpwssds_v4si", IX86_BUILTIN_VPDPWSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssds_v4si, "__builtin_ia32_vpdpwssds_v4si", IX86_BUILTIN_VPDPWSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_mask, "__builtin_ia32_vpdpwssds_v4si_mask", IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c index 504987a5410..b9f6289c869 100644 --- a/gcc/config/i386/i386-builtins.c +++ b/gcc/config/i386/i386-builtins.c @@ -274,6 +274,8 @@ def_builtin (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0) && (mask == 0 || (mask & ix86_isa_flags) != 0)) || ((mask & OPTION_MASK_ISA_MMX) != 0 && TARGET_MMX_WITH_SSE) + /* Support unified builtin. */ + || (mask2 == OPTION_MASK_ISA2_AVXVNNI) || (lang_hooks.builtin_function == lang_hooks.builtin_function_ext_scope)) { diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 3299a566746..87b3a2bf143 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -606,6 +606,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__KL__"); if (isa_flag2 & OPTION_MASK_ISA2_WIDEKL) def_or_undef (parse_in, "__WIDEKL__"); + if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNI) + def_or_undef (parse_in, "__AVXVNNI__"); if (TARGET_IAMCU) { def_or_undef (parse_in, "__iamcu"); diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 6f81b58a08e..795320b4557 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -11059,6 +11059,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4 + (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL) or + OPTION_MASK_ISA2_AVXVNNI where for each such pair it is sufficient if either of the ISAs is enabled, plus if it is ored with other options also those others. OPTION_MASK_ISA_MMX in bisa is satisfied also if TARGET_MMX_WITH_SSE. */ @@ -11077,6 +11079,17 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0) isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4); + if ((((bisa & (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL)) + == (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL)) + || (bisa2 & OPTION_MASK_ISA2_AVXVNNI) != 0) + && (((isa & (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL)) + == (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL)) + || (isa2 & OPTION_MASK_ISA2_AVXVNNI) != 0)) + { + isa |= OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL; + isa2 |= OPTION_MASK_ISA2_AVXVNNI; + } + if ((bisa & OPTION_MASK_ISA_MMX) && !TARGET_MMX && TARGET_MMX_WITH_SSE /* __builtin_ia32_maskmovq requires MMX registers. */ && fcode != IX86_BUILTIN_MASKMOVQ) diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index 4128e933291..467bed82f47 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -216,7 +216,8 @@ static struct ix86_target_opts isa2_opts[] = { "-muintr", OPTION_MASK_ISA2_UINTR }, { "-mhreset", OPTION_MASK_ISA2_HRESET }, { "-mkl", OPTION_MASK_ISA2_KL }, - { "-mwidekl", OPTION_MASK_ISA2_WIDEKL } + { "-mwidekl", OPTION_MASK_ISA2_WIDEKL }, + { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI } }; static struct ix86_target_opts isa_opts[] = { @@ -1047,6 +1048,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], IX86_ATTR_ISA ("amx-int8", OPT_mamx_int8), IX86_ATTR_ISA ("amx-bf16", OPT_mamx_bf16), IX86_ATTR_ISA ("hreset", OPT_mhreset), + IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), @@ -2304,6 +2306,10 @@ ix86_option_override_internal (bool main_args_p, && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_AMX_BF16)) opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_BF16; + if (((processor_alias_table[i].flags & PTA_AVXVNNI) != 0) + && !(opts->x_ix86_isa_flags2_explicit + & OPTION_MASK_ISA2_AVXVNNI)) + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXVNNI; if (((processor_alias_table[i].flags & PTA_MOVDIRI) != 0) && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVDIRI)) opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVDIRI; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index d0c157a9970..de4d6ce5280 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -217,6 +217,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_KL_P(x) TARGET_ISA2_KL_P(x) #define TARGET_WIDEKL TARGET_ISA2_WIDEKL #define TARGET_WIDEKL_P(x) TARGET_ISA2_WIDEKL_P(x) +#define TARGET_AVXVNNI TARGET_ISA2_AVXVNNI +#define TARGET_AVXVNNI_P(x) TARGET_ISA2_AVXVNNI_P(x) #define TARGET_LP64 TARGET_ABI_64 #define TARGET_LP64_P(x) TARGET_ABI_64_P(x) @@ -2493,6 +2495,7 @@ const wide_int_bitmask PTA_UINTR (0, HOST_WIDE_INT_1U << 22); const wide_int_bitmask PTA_HRESET(0, HOST_WIDE_INT_1U << 23); const wide_int_bitmask PTA_KL (0, HOST_WIDE_INT_1U << 24); const wide_int_bitmask PTA_WIDEKL (0, HOST_WIDE_INT_1U << 25); +const wide_int_bitmask PTA_AVXVNNI (0, HOST_WIDE_INT_1U << 26); const wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR; @@ -2537,9 +2540,9 @@ const wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI const wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE - | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR; + | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI; const wide_int_bitmask PTA_ALDERLAKE = PTA_SKYLAKE | PTA_CLDEMOTE | PTA_PTWRITE - | PTA_WAITPKG | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL; + | PTA_WAITPKG | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; const wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD; const wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 979e49d4723..80f1ccccf27 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -818,7 +818,8 @@ sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx, avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, avx512bw,noavx512bw,avx512dq,noavx512dq, - avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw" + avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw, + avxvnni,avx512vnnivl" (const_string "base")) ;; Define instruction set of MMX instructions @@ -867,6 +868,8 @@ (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ") (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL") (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL") + (eq_attr "isa" "avxvnni") (symbol_ref "TARGET_AVXVNNI") + (eq_attr "isa" "avx512vnnivl") (symbol_ref "TARGET_AVX512VNNI && TARGET_AVX512VL") (eq_attr "mmx_isa" "native") (symbol_ref "!TARGET_MMX_WITH_SSE") diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 029cacb38e0..fac76e4a985 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1143,3 +1143,8 @@ Support KL built-in functions and code generation. mwidekl Target Report Mask(ISA2_WIDEKL) Var(ix86_isa_flags2) Save Support WIDEKL built-in functions and code generation. + +mavxvnni +Target Report Mask(ISA2_AVXVNNI) Var(ix86_isa_flags2) Save +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and +AVXVNNI built-in functions and code generation. diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index 0ce08e5b341..b7879678378 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -42,6 +42,8 @@ #include <avxintrin.h> +#include <avxvnniintrin.h> + #include <avx2intrin.h> #include <avx512fintrin.h> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b153a87fb98..8437ad27087 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -22915,16 +22915,30 @@ [(set_attr ("prefix") ("evex")) (set_attr "mode" "<sseinsnmode>")]) -(define_insn "vpdpbusd_<mode>" - [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") - (unspec:VI4_AVX512VL - [(match_operand:VI4_AVX512VL 1 "register_operand" "0") - (match_operand:VI4_AVX512VL 2 "register_operand" "v") - (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")] +(define_insn "vpdpbusd_v16si" + [(set (match_operand:V16SI 0 "register_operand" "=v") + (unspec:V16SI + [(match_operand:V16SI 1 "register_operand" "0") + (match_operand:V16SI 2 "register_operand" "v") + (match_operand:V16SI 3 "nonimmediate_operand" "vm")] UNSPEC_VPMADDUBSWACCD))] "TARGET_AVX512VNNI" - "vpdpbusd\t{%3, %2, %0|%0, %2, %3 }" - [(set_attr ("prefix") ("evex"))]) + "vpdpbusd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("evex"))]) + +(define_insn "vpdpbusd_<mode>" + [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v") + (unspec:VI4_AVX2 + [(match_operand:VI4_AVX2 1 "register_operand" "0,0") + (match_operand:VI4_AVX2 2 "register_operand" "x,v") + (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] + UNSPEC_VPMADDUBSWACCD))] + "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" + "@ + %{vex%} vpdpbusd\t{%3, %2, %0|%0, %2, %3} + vpdpbusd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("vex,evex")) + (set_attr ("isa") ("avxvnni,avx512vnnivl"))]) (define_insn "vpdpbusd_<mode>_mask" [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") @@ -22969,17 +22983,30 @@ "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }" [(set_attr ("prefix") ("evex"))]) +(define_insn "vpdpbusds_v16si" + [(set (match_operand:V16SI 0 "register_operand" "=v") + (unspec:V16SI + [(match_operand:V16SI 1 "register_operand" "0") + (match_operand:V16SI 2 "register_operand" "v") + (match_operand:V16SI 3 "nonimmediate_operand" "vm")] + UNSPEC_VPMADDUBSWACCSSD))] + "TARGET_AVX512VNNI" + "vpdpbusds\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("evex"))]) (define_insn "vpdpbusds_<mode>" - [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") - (unspec:VI4_AVX512VL - [(match_operand:VI4_AVX512VL 1 "register_operand" "0") - (match_operand:VI4_AVX512VL 2 "register_operand" "v") - (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")] + [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v") + (unspec:VI4_AVX2 + [(match_operand:VI4_AVX2 1 "register_operand" "0,0") + (match_operand:VI4_AVX2 2 "register_operand" "x,v") + (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] UNSPEC_VPMADDUBSWACCSSD))] - "TARGET_AVX512VNNI" - "vpdpbusds\t{%3, %2, %0|%0, %2, %3 }" - [(set_attr ("prefix") ("evex"))]) + "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" + "@ + %{vex%} vpdpbusds\t{%3, %2, %0|%0, %2, %3} + vpdpbusds\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("vex,evex")) + (set_attr ("isa") ("avxvnni,avx512vnnivl"))]) (define_insn "vpdpbusds_<mode>_mask" [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") @@ -23024,17 +23051,30 @@ "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }" [(set_attr ("prefix") ("evex"))]) +(define_insn "vpdpwssd_v16si" + [(set (match_operand:V16SI 0 "register_operand" "=v") + (unspec:V16SI + [(match_operand:V16SI 1 "register_operand" "0") + (match_operand:V16SI 2 "register_operand" "v") + (match_operand:V16SI 3 "nonimmediate_operand" "vm")] + UNSPEC_VPMADDWDACCD))] + "TARGET_AVX512VNNI" + "vpdpwssd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("evex"))]) (define_insn "vpdpwssd_<mode>" - [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") - (unspec:VI4_AVX512VL - [(match_operand:VI4_AVX512VL 1 "register_operand" "0") - (match_operand:VI4_AVX512VL 2 "register_operand" "v") - (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")] + [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v") + (unspec:VI4_AVX2 + [(match_operand:VI4_AVX2 1 "register_operand" "0,0") + (match_operand:VI4_AVX2 2 "register_operand" "x,v") + (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] UNSPEC_VPMADDWDACCD))] - "TARGET_AVX512VNNI" - "vpdpwssd\t{%3, %2, %0|%0, %2, %3 }" - [(set_attr ("prefix") ("evex"))]) + "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" + "@ + %{vex%} vpdpwssd\t{%3, %2, %0|%0, %2, %3} + vpdpwssd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("vex,evex")) + (set_attr ("isa") ("avxvnni,avx512vnnivl"))]) (define_insn "vpdpwssd_<mode>_mask" [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") @@ -23079,17 +23119,30 @@ "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }" [(set_attr ("prefix") ("evex"))]) +(define_insn "vpdpwssds_v16si" + [(set (match_operand:V16SI 0 "register_operand" "=v") + (unspec:V16SI + [(match_operand:V16SI 1 "register_operand" "0") + (match_operand:V16SI 2 "register_operand" "v") + (match_operand:V16SI 3 "nonimmediate_operand" "vm")] + UNSPEC_VPMADDWDACCSSD))] + "TARGET_AVX512VNNI" + "vpdpwssds\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("evex"))]) (define_insn "vpdpwssds_<mode>" - [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") - (unspec:VI4_AVX512VL - [(match_operand:VI4_AVX512VL 1 "register_operand" "0") - (match_operand:VI4_AVX512VL 2 "register_operand" "v") - (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")] + [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v") + (unspec:VI4_AVX2 + [(match_operand:VI4_AVX2 1 "register_operand" "0,0") + (match_operand:VI4_AVX2 2 "register_operand" "x,v") + (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] UNSPEC_VPMADDWDACCSSD))] - "TARGET_AVX512VNNI" - "vpdpwssds\t{%3, %2, %0|%0, %2, %3 }" - [(set_attr ("prefix") ("evex"))]) + "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" + "@ + %{vex%} vpdpwssds\t{%3, %2, %0|%0, %2, %3} + vpdpwssds\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("vex,evex")) + (set_attr ("isa") ("avxvnni,avx512vnnivl"))]) (define_insn "vpdpwssds_<mode>_mask" [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 5f1e3bf8a2e..420a14b66b0 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -6750,6 +6750,11 @@ Enable/disable the generation of the KEYLOCKER instructions. @cindex @code{target("widekl")} function attribute, x86 Enable/disable the generation of the WIDEKL instructions. +@item avxvnni +@itemx no-avxvnni +@cindex @code{target("avxvnni")} function attribute, x86 +Enable/disable the generation of the AVXVNNI instructions. + @item cld @itemx no-cld @cindex @code{target("cld")} function attribute, x86 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index d2a188d7c75..347db9d36a3 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1367,7 +1367,7 @@ See RS/6000 and PowerPC Options. -mvpclmulqdq -mavx512bitalg -mmovdiri -mmovdir64b -mavx512vpopcntdq @gol -mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol -mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol --mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset@gol +-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol -mkl -mwidekl @gol @@ -30408,6 +30408,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @itemx -mavx512vnni @opindex mavx512vnni @need 200 +@itemx -mavxvnni +@opindex mavxvnni +@need 200 @itemx -mavx5124vnniw @opindex mavx5124vnniw @need 200 @@ -30442,9 +30445,9 @@ WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, RDSEED, SGX, XOP, LWP, XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2, GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16, ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE, -UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL or CLDEMOTE extended -instruction sets. Each has a corresponding @option{-mno-} option to disable -use of these instructions. +UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI or CLDEMOTE +extended instruction sets. Each has a corresponding @option{-mno-} option to +disable use of these instructions. These extensions are also available as built-in functions: see @ref{x86 Built-in Functions}, for details of the functions enabled and diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index b3c5e530423..4822efe0a58 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2243,6 +2243,9 @@ Target supports compiling @code{avx2} instructions. @item avx2_runtime Target supports the execution of @code{avx2} instructions. +@item avxvnni +Target supports the execution of @code{avxvnni} instructions. + @item avx512f Target supports compiling @code{avx512f} instructions. diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index b964248fc0d..62b2132957a 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index 2f73de27c11..843aa2bdb2f 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/gcc.target/i386/avx-vnni-1.c b/gcc/testsuite/gcc.target/i386/avx-vnni-1.c new file mode 100644 index 00000000000..a22d12aa980 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vnni-1.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavxvnni -O2" } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + + +#include <immintrin.h> + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; + +void extern +avxvnni_test (void) +{ + x = _mm256_dpbusd_epi32 (x, y, z); + x_ = _mm_dpbusd_epi32 (x_, y_, z_); + x = _mm256_dpbusds_epi32 (x, y, z); + x_ = _mm_dpbusds_epi32 (x_, y_, z_); + x = _mm256_dpwssd_epi32 (x, y, z); + x_ = _mm_dpwssd_epi32 (x_, y_, z_); + x = _mm256_dpwssds_epi32 (x, y, z); + x_ = _mm_dpwssds_epi32 (x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vnni-2.c b/gcc/testsuite/gcc.target/i386/avx-vnni-2.c new file mode 100644 index 00000000000..4ab6f0c8c1e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vnni-2.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + + +#include <immintrin.h> + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; + +__attribute__((target("avxvnni"))) +void +avxvnni_test (void) +{ + x = _mm256_dpbusd_epi32 (x, y, z); + x_ = _mm_dpbusd_epi32 (x_, y_, z_); + x = _mm256_dpbusds_epi32 (x, y, z); + x_ = _mm_dpbusds_epi32 (x_, y_, z_); + x = _mm256_dpwssd_epi32 (x, y, z); + x_ = _mm_dpwssd_epi32 (x_, y_, z_); + x = _mm256_dpwssds_epi32 (x, y, z); + x_ = _mm_dpwssds_epi32 (x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vnni-3.c b/gcc/testsuite/gcc.target/i386/avx-vnni-3.c new file mode 100644 index 00000000000..fdea7f95808 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vnni-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64" } */ + +__attribute__ ((__gnu_inline__, __always_inline__, target("avxvnni"))) +inline int +foo (void) /* { dg-error "inlining failed in call to 'always_inline' .* target specific option mismatch" } */ +{ + return 0; +} + +__attribute__ ((target("avx512vnni,avx512vl"))) +int +bar (void) +{ + return foo (); /* { dg-message "called from here" } */ +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vnni-4.c b/gcc/testsuite/gcc.target/i386/avx-vnni-4.c new file mode 100644 index 00000000000..1ef3edc140e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vnni-4.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64" } */ + +__attribute__ ((__gnu_inline__, __always_inline__, target("avx512vnni,avx512vl"))) +inline int +foo (void) /* { dg-error "inlining failed in call to 'always_inline' .* target specific option mismatch" } */ +{ + return 0; +} + +__attribute__ ((target("avxvnni"))) +int +bar (void) +{ + return foo (); /* { dg-message "called from here" } */ +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vnni-5.c b/gcc/testsuite/gcc.target/i386/avx-vnni-5.c new file mode 100644 index 00000000000..6556a323b1d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vnni-5.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavxvnni -mavx512vnni -mavx512vl" } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + + +#include <immintrin.h> + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; + +void +avxvnni_test (void) +{ + x = _mm256_dpbusd_epi32 (x, y, z); + x_ = _mm_dpbusd_epi32 (x_, y_, z_); + x = _mm256_dpbusds_epi32 (x, y, z); + x_ = _mm_dpbusds_epi32 (x_, y_, z_); + x = _mm256_dpwssd_epi32 (x, y, z); + x_ = _mm_dpwssd_epi32 (x_, y_, z_); + x = _mm256_dpwssds_epi32 (x, y, z); + x_ = _mm_dpwssds_epi32 (x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vnni-6.c b/gcc/testsuite/gcc.target/i386/avx-vnni-6.c new file mode 100644 index 00000000000..2c4262788a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vnni-6.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavxvnni -O2" } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + + +#include <immintrin.h> + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; + +void extern +avxvnni_test (void) +{ + x = _mm256_dpbusd_avx_epi32 (x, y, z); + x_ = _mm_dpbusd_avx_epi32 (x_, y_, z_); + x = _mm256_dpbusds_avx_epi32 (x, y, z); + x_ = _mm_dpbusds_avx_epi32 (x_, y_, z_); + x = _mm256_dpwssd_avx_epi32 (x, y, z); + x_ = _mm_dpwssd_avx_epi32 (x_, y_, z_); + x = _mm256_dpwssds_avx_epi32 (x, y, z); + x_ = _mm_dpwssds_avx_epi32 (x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vpdpbusd-2.c b/gcc/testsuite/gcc.target/i386/avx-vpdpbusd-2.c new file mode 100644 index 00000000000..b043bbff797 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vpdpbusd-2.c @@ -0,0 +1,71 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavxvnni" } */ +/* { dg-require-effective-target avxvnni } */ + +#ifndef CHECK +#define CHECK "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK + +static void +CALC (int *r, int *dst, unsigned char *s1, char *s2, int size) +{ + short tempres[32]; + for (int i = 0; i < size; i++) { + tempres[i] = ((unsigned short)(s1[i]) * (short)(s2[i])); + } + for (int i = 0; i < size / 4; i++) { + long long test = (long long)dst[i] + tempres[i*4] + tempres[i*4 + 1] + tempres[i*4 + 2] + tempres[i*4 + 3]; + r[i] = test; + } +} + +void +TEST (void) +{ + int i; + union256i_d res_256; + union256i_b src2_256; + union256i_ub src1_256; + int res_ref_256[8]; + + for (i = 0; i < 32; i++) + { + int sign = i % 2 ? 1 : -1; + src1_256.a[i] = 10 + 3*i + sign; + src2_256.a[i] = sign*10*i*i; + } + + for (i = 0; i < 8; i++) + res_256.a[i] = 0x7fffffff; + + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32); + res_256.x = _mm256_dpbusd_avx_epi32 (res_256.x, src1_256.x, src2_256.x); + if (check_union256i_d (res_256, res_ref_256)) + abort (); + + union128i_d res_128; + union128i_b src2_128; + union128i_ub src1_128; + int res_ref_128[4]; + + for (i = 0; i < 16; i++) + { + int sign = i % 2 ? 1 : -1; + src1_128.a[i] = 10 + 3*i*i + sign; + src2_128.a[i] = sign*10*i*i; + } + + for (i = 0; i < 4; i++) + res_128.a[i] = 0x7fffffff; + + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16); + res_128.x = _mm_dpbusd_avx_epi32 (res_128.x, src1_128.x, src2_128.x); + if (check_union128i_d (res_128, res_ref_128)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vpdpbusds-2.c b/gcc/testsuite/gcc.target/i386/avx-vpdpbusds-2.c new file mode 100644 index 00000000000..5899ee43d62 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vpdpbusds-2.c @@ -0,0 +1,71 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavxvnni " } */ +/* { dg-require-effective-target avxvnni } */ + +#ifndef CHECK +#define CHECK "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK + +static void +CALC (int *r, int *dst, unsigned char *s1, char *s2, int size) +{ + short tempres[32]; + for (int i = 0; i < size; i++) { + tempres[i] = ((unsigned short)(s1[i]) * (short)(s2[i])); + } + for (int i = 0; i < size / 4; i++) { + long long test = (long long)dst[i] + tempres[i*4] + tempres[i*4 + 1] + tempres[i*4 + 2] + tempres[i*4 + 3]; + r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test; + } +} + +void +TEST (void) +{ + int i; + union256i_d res_256; + union256i_b src2_256; + union256i_ub src1_256; + int res_ref_256[8]; + + for (i = 0; i < 32; i++) + { + int sign = i % 2 ? 1 : -1; + src1_256.a[i] = 10 + 3*i*i + sign; + src2_256.a[i] = sign*10*i*i; + } + + for (i = 0; i < 8; i++) + res_256.a[i] = 0x7fffffff; + + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32); + res_256.x = _mm256_dpbusds_avx_epi32 (res_256.x, src1_256.x, src2_256.x); + if (check_union256i_d (res_256, res_ref_256)) + abort (); + + union128i_d res_128; + union128i_b src2_128; + union128i_ub src1_128; + int res_ref_128[4]; + + for (i = 0; i < 16; i++) + { + int sign = i % 2 ? 1 : -1; + src1_128.a[i] = 10 + 3*i*i + sign; + src2_128.a[i] = sign*10*i*i; + } + + for (i = 0; i < 4; i++) + res_128.a[i] = 0x7fffffff; + + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16); + res_128.x = _mm_dpbusds_avx_epi32 (res_128.x, src1_128.x, src2_128.x); + if (check_union128i_d (res_128, res_ref_128)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vpdpwssd-2.c b/gcc/testsuite/gcc.target/i386/avx-vpdpwssd-2.c new file mode 100644 index 00000000000..1677b793fa3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vpdpwssd-2.c @@ -0,0 +1,67 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavxvnni" } */ +/* { dg-require-effective-target avxvnni } */ + +#ifndef CHECK +#define CHECK "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK + +static void +CALC (int *r, int *dst, short *s1, short *s2, int size) +{ + short tempres[16]; + for (int i = 0; i < size; i++) { + tempres[i] = ((int)(s1[i]) * (int)(s2[i])); + } + for (int i = 0; i < size / 2; i++) { + long long test = (long long)dst[i] + tempres[i*2] + tempres[i*2 + 1]; + r[i] = test; + } +} + +void +TEST (void) +{ + int i; + union256i_d res_256; + union256i_w src1_256, src2_256; + int res_ref_256[8]; + + for (i = 0; i < 16; i++) + { + src1_256.a[i] = 1 + i; + src2_256.a[i] = 2 + 2*i + i * i; + } + + for (i = 0; i < 8; i++) + res_256.a[i] = 0x7fffffff; + + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 16); + res_256.x = _mm256_dpwssd_avx_epi32 (res_256.x, src1_256.x, src2_256.x); + if (check_union256i_d (res_256, res_ref_256)) + abort (); + + union128i_d res_128; + union128i_w src1_128, src2_128; + int res_ref_128[4]; + + for (i = 0; i < 8; i++) + { + src1_128.a[i] = 1 + i; + src2_128.a[i] = 2 + 2*i + i * i; + } + + for (i = 0; i < 4; i++) + res_128.a[i] = 0x7fffffff; + + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 8); + res_128.x = _mm_dpwssd_avx_epi32 (res_128.x, src1_128.x, src2_128.x); + if (check_union128i_d (res_128, res_ref_128)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vpdpwssds-2.c b/gcc/testsuite/gcc.target/i386/avx-vpdpwssds-2.c new file mode 100644 index 00000000000..d07567d524b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vpdpwssds-2.c @@ -0,0 +1,67 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavxvnni" } */ +/* { dg-require-effective-target avxvnni } */ + +#ifndef CHECK +#define CHECK "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK + +static void +CALC (int *r, int *dst, short *s1, short *s2, int size) +{ + short tempres[16]; + for (int i = 0; i < size; i++) { + tempres[i] = ((int)(s1[i]) * (int)(s2[i])); + } + for (int i = 0; i < size / 2; i++) { + long long test = (long long)dst[i] + tempres[i*2] + tempres[i*2 + 1]; + r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test; + } +} + +void +TEST (void) +{ + int i; + union256i_d res_256; + union256i_w src1_256, src2_256; + int res_ref_256[8]; + + for (i = 0; i < 16; i++) + { + src1_256.a[i] = 1 + i; + src2_256.a[i] = 2 + 2*i + i * i; + } + + for (i = 0; i < 8; i++) + res_256.a[i] = 0x7fffffff; + + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 16); + res_256.x = _mm256_dpwssds_avx_epi32 (res_256.x, src1_256.x, src2_256.x); + if (check_union256i_d (res_256, res_ref_256)) + abort (); + + union128i_d res_128; + union128i_w src1_128, src2_128; + int res_ref_128[4]; + + for (i = 0; i < 8; i++) + { + src1_128.a[i] = 1 + i; + src2_128.a[i] = 2 + 2*i + i * i; + } + + for (i = 0; i < 4; i++) + res_128.a[i] = 0x7fffffff; + + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 8); + res_128.x = _mm_dpwssds_avx_epi32 (res_128.x, src1_128.x, src2_128.x); + if (check_union128i_d (res_128, res_ref_128)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vnni-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vnni-1a.c similarity index 100% rename from gcc/testsuite/gcc.target/i386/avx512vl-vnni-1.c rename to gcc/testsuite/gcc.target/i386/avx512vl-vnni-1a.c diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vnni-1b.c b/gcc/testsuite/gcc.target/i386/avx512vl-vnni-1b.c new file mode 100644 index 00000000000..067e631c89a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vnni-1b.c @@ -0,0 +1,69 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512vl -mavx512vnni -mavx512bw -mavxvnni -O2" } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + + +#include <immintrin.h> + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; +volatile __mmask32 m; + +void extern +avx512f_test (void) +{ + x = _mm256_dpbusd_epi32 (x, y, z); + x = _mm256_mask_dpbusd_epi32 (x, m, y, z); + x = _mm256_maskz_dpbusd_epi32 (m, x, y, z); + + x_ = _mm_dpbusd_epi32 (x_, y_, z_); + x_ = _mm_mask_dpbusd_epi32 (x_, m, y_, z_); + x_ = _mm_maskz_dpbusd_epi32 (m, x_, y_, z_); + + x = _mm256_dpbusds_epi32 (x, y, z); + x = _mm256_mask_dpbusds_epi32 (x, m, y, z); + x = _mm256_maskz_dpbusds_epi32 (m, x, y, z); + + x_ = _mm_dpbusds_epi32 (x_, y_, z_); + x_ = _mm_mask_dpbusds_epi32 (x_, m, y_, z_); + x_ = _mm_maskz_dpbusds_epi32 (m, x_, y_, z_); + + x = _mm256_dpwssd_epi32 (x, y, z); + x = _mm256_mask_dpwssd_epi32 (x, m, y, z); + x = _mm256_maskz_dpwssd_epi32 (m, x, y, z); + + x_ = _mm_dpwssd_epi32 (x_, y_, z_); + x_ = _mm_mask_dpwssd_epi32 (x_, m, y_, z_); + x_ = _mm_maskz_dpwssd_epi32 (m, x_, y_, z_); + + x = _mm256_dpwssds_epi32 (x, y, z); + x = _mm256_mask_dpwssds_epi32 (x, m, y, z); + x = _mm256_maskz_dpwssds_epi32 (m, x, y, z); + + x_ = _mm_dpwssds_epi32 (x_, y_, z_); + x_ = _mm_mask_dpwssds_epi32 (x_, m, y_, z_); + x_ = _mm_maskz_dpwssds_epi32 (m, x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vnni-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vnni-2.c new file mode 100644 index 00000000000..d4b46356d80 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vnni-2.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "\\tvpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + + +#include <immintrin.h> + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; + +__attribute__((target("avx512vnni,avx512vl"))) +void +avxvnni_test (void) +{ + x = _mm256_dpbusd_epi32 (x, y, z); + x_ = _mm_dpbusd_epi32 (x_, y_, z_); + x = _mm256_dpbusds_epi32 (x, y, z); + x_ = _mm_dpbusds_epi32 (x_, y_, z_); + x = _mm256_dpwssd_epi32 (x, y, z); + x_ = _mm_dpwssd_epi32 (x_, y_, z_); + x = _mm256_dpwssds_epi32 (x, y, z); + x_ = _mm_dpwssds_epi32 (x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vnni-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-vnni-3.c new file mode 100644 index 00000000000..15a95abe83a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vnni-3.c @@ -0,0 +1,47 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavxvnni -mavx512vnni -mavx512vl" } */ +/* { dg-final { scan-assembler-times "\\tvpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + + +#include <immintrin.h> + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; + +void +avxvnni_test (void) +{ + register __m256i a __asm ("xmm16"); + register __m128i a_ __asm ("xmm26"); + a = _mm256_dpbusd_epi32 (x, y, z); + asm volatile ("" : "+v" (a)); + x = a; + a_ = _mm_dpbusd_epi32 (x_, y_, z_); + asm volatile ("" : "+v" (a_)); + x_ = a_; + a = _mm256_dpbusds_epi32 (x, y, z); + asm volatile ("" : "+v" (a)); + x = a; + a_ = _mm_dpbusds_epi32 (x_, y_, z_); + asm volatile ("" : "+v" (a_)); + x_ = a_; + a = _mm256_dpwssd_epi32 (x, y, z); + asm volatile ("" : "+v" (a)); + x = a; + a_ = _mm_dpwssd_epi32 (x_, y_, z_); + asm volatile ("" : "+v" (a_)); + x_ = a_; + a = _mm256_dpwssds_epi32 (x, y, z); + asm volatile ("" : "+v" (a)); + x = a; + a_ = _mm_dpwssds_epi32 (x_, y_, z_); + asm volatile ("" : "+v" (a_)); + x_ = a_; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vnnivl-builtin.c b/gcc/testsuite/gcc.target/i386/avx512vnnivl-builtin.c new file mode 100644 index 00000000000..97aaba0c3ef --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vnnivl-builtin.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mno-avxvnni -mavx512vnni -mavx512vl" } */ +typedef int v8si __attribute__ ((vector_size (32))); +v8si +foo (v8si a, v8si b, v8si c) +{ + return __builtin_ia32_vpdpbusd_v8si (a, b, c); +} diff --git a/gcc/testsuite/gcc.target/i386/avxvnni-builtin.c b/gcc/testsuite/gcc.target/i386/avxvnni-builtin.c new file mode 100644 index 00000000000..893a62ae185 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avxvnni-builtin.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavxvnni -mno-avx512vnni" } */ +typedef int v8si __attribute__ ((vector_size (32))); +v8si +foo (v8si a, v8si b, v8si c) +{ + return __builtin_ia32_vpdpbusd_v8si (a, b, c); +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index b8e3b1f7dee..395a21c8668 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -78,6 +78,7 @@ extern void test_uintr (void) __attribute__((__target__("uintr"))); extern void test_hreset (void) __attribute__((__target__("hreset"))); extern void test_keylocker (void) __attribute__((__target__("kl"))); extern void test_widekl (void) __attribute__((__target__("widekl"))); +extern void test_avxvnni (void) __attribute__((__target__("avxvnni"))); extern void test_no_sgx (void) __attribute__((__target__("no-sgx"))); extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps"))); @@ -157,6 +158,7 @@ extern void test_no_uintr (void) __attribute__((__target__("no-uintr"))); extern void test_no_hreset (void) __attribute__((__target__("no-hreset"))); extern void test_no_keylocker (void) __attribute__((__target__("no-kl"))); extern void test_no_widekl (void) __attribute__((__target__("no-widekl"))); +extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni"))); extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index f1e05e62d8a..375d4d1b4de 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -3,7 +3,7 @@ popcntintrin.h gfniintrin.h and mm_malloc.h are usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 7f96331ffe9..7029771334b 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ /* { dg-add-options bind_pic_locally } */ #include <mm_malloc.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 27704c3e23b..4ce0ffffaf3 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ /* { dg-add-options bind_pic_locally } */ #include <mm_malloc.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 789c8bebbab..6e8b6f3fa1b 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -103,7 +103,7 @@ #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni") #endif /* Following intrinsics require immediate arguments. They @@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */ #ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl") +#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni") #endif #include <immintrin.h> test_1 (_cvtss_sh, unsigned short, float, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 3e5e3e98f24..7faa053ace8 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -708,6 +708,6 @@ #define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) #define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni") #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/vnni_inline_error.c b/gcc/testsuite/gcc.target/i386/vnni_inline_error.c new file mode 100644 index 00000000000..eaed9842604 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vnni_inline_error.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavx512vnni -mavx512vl -mno-popcnt" } */ + +inline int __attribute__ ((__gnu_inline__, __always_inline__, target("popcnt"))) +foo () /* { dg-error "inlining failed in call to 'always_inline' .* target specific option mismatch" } */ +{ + return 0; +} + +int bar() +{ + return foo (); /* { dg-message "called from here" } */ +} diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 60ebbb39f9d..ceee78c26a9 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -8461,6 +8461,18 @@ proc check_effective_target_avx2 { } { } "-O0 -mavx2" ] } +# Return 1 if avxvnni instructions can be compiled. +proc check_effective_target_avxvnni { } { + return [check_no_compiler_messages avxvnni object { + typedef int __v8si __attribute__ ((__vector_size__ (32))); + __v8si + _mm256_dpbusd_epi32 (__v8si __A, __v8si __B, __v8si __C) + { + return __builtin_ia32_vpdpbusd_v8si (__A, __B, __C); + } + } "-mavxvnni" ] +} + # Return 1 if sse instructions can be compiled. proc check_effective_target_sse { } { return [check_no_compiler_messages sse object { -- 2.25.1