[PATCH] i386: Add AVX2 pragma wrapper for AVX512DQVL intrins

2023-08-18 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch aims to fix PR111051, which actually make sure that AVX2
intrins are visible to AVX512/AVX10 intrins under any circumstances.

I will also apply the same fix on AVX512DQ scalar intrins.

Regtested on on x86_64-pc-linux-gnu. Ok for trunk?

Thx,
Haochen

PR target/111051

gcc/ChangeLog:

* config/i386/avx512vldqintrin.h: Push AVX2 when AVX2 is
disabled.

gcc/testsuite/ChangeLog:

PR target/111051
* gcc.target/i386/pr111051-1.c: New test.
---
 gcc/config/i386/avx512vldqintrin.h | 11 +++
 gcc/testsuite/gcc.target/i386/pr111051-1.c | 11 +++
 2 files changed, 22 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr111051-1.c

diff --git a/gcc/config/i386/avx512vldqintrin.h 
b/gcc/config/i386/avx512vldqintrin.h
index 1fbf93a0b52..db900ebf467 100644
--- a/gcc/config/i386/avx512vldqintrin.h
+++ b/gcc/config/i386/avx512vldqintrin.h
@@ -28,6 +28,12 @@
 #ifndef _AVX512VLDQINTRIN_H_INCLUDED
 #define _AVX512VLDQINTRIN_H_INCLUDED
 
+#if !defined(__AVX2__)
+#pragma GCC push_options
+#pragma GCC target("avx2")
+#define __DISABLE_AVX2__
+#endif /* __AVX2__ */
+
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvttpd_epi64 (__m256d __A)
@@ -2002,4 +2008,9 @@ _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, 
__m128d __B,
 
 #endif
 
+#ifdef __DISABLE_AVX2__
+#undef __DISABLE_AVX2__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX2__ */
+
 #endif /* _AVX512VLDQINTRIN_H_INCLUDED */
diff --git a/gcc/testsuite/gcc.target/i386/pr111051-1.c 
b/gcc/testsuite/gcc.target/i386/pr111051-1.c
new file mode 100644
index 000..973007043cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr111051-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+
+#include 
+
+#pragma GCC target("avx512vl,avx512dq")
+
+void foo (__m256i i)
+{
+  volatile __m256d v1 = _mm256_cvtepi64_pd (i);
+}
+
-- 
2.31.1



[PATCH 1/2] [PATCH 1/2] Support AVX10.1 for AVX512DQ intrins

2023-08-17 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* config.gcc: Add avx512dqavx10_1intrin.h.
* config/i386/avx512dqintrin.h: Move avx10_1 related intrins
to new intrin file.
* config/i386/i386-builtin.def (BDESC):
Add OPTION_MASK_ISA2_AVX10_1.
* config/i386/i386.md (x64_avx512dq): Rename to
x64_avx10_1_or_avx512dq. Add TARGET_AVX10_1.
(*movqi_internal): Add TARGET_AVX10_1.
* config/i386/immintrin.h: Add avx512dqavx10_1intrin.h.
* config/i386/sse.md (SWI1248_AVX512BWDQ): Add
TARGET_AVX10_1 and TARGET_AVX512F.
(SWI1248_AVX512BW): Ditto.
(SWI1248_AVX512BWDQ2): Ditto.
(kmov): Remove TARGET_AVX512F check.
(k): Remove TARGET_AVX512F check. Add
TARGET_AVX10_1.
(kandn): Ditto.
(kxnor): Ditto.
(knot): Ditto.
(kadd): Remove TARGET_AVX512F check.
(k): Ditto.
(ktest): Ditto.
(kortest): Ditto.
(reduces):
Add TARGET_AVX10_1.
(pinsr_evex_isa): Change avx512dq to avx10_1_or_avx512dq.
(*vec_extractv4si): Ditto.
(*vec_extractv4si_zext): Ditto.
(*vec_concatv2si_sse4_1): Ditto.
(*vec_extractv2di_1): Change x64_avx512dq to
x64_avx10_1_or_avx512dq.
(vec_concatv2di): Ditto.
(avx512dq_ranges):
Add TARGET_AVX10_1.
(avx512dq_vmfpclass): Ditto.
* config/i386/subst.md (mask_scalar): Ditto.
(round_saeonly_scalar): Ditto.

gcc/testsuite/Changelog:

* gcc.target/i386/sse-26.c: Skip avx512dqavx10_1intrin.h.
---
 gcc/config.gcc  |   9 +-
 gcc/config/i386/avx512dqavx10_1intrin.h | 634 
 gcc/config/i386/avx512dqintrin.h| 602 --
 gcc/config/i386/i386-builtin.def|  50 +-
 gcc/config/i386/i386.md |   8 +-
 gcc/config/i386/immintrin.h |   2 +
 gcc/config/i386/sse.md  |  63 +--
 gcc/config/i386/subst.md|   4 +-
 gcc/testsuite/gcc.target/i386/sse-26.c  |   1 +
 9 files changed, 706 insertions(+), 667 deletions(-)
 create mode 100644 gcc/config/i386/avx512dqavx10_1intrin.h

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 415e0e1ebc5..9b1be5350cd 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -415,10 +415,11 @@ i[34567]86-*-* | x86_64-*-*)
   adxintrin.h fxsrintrin.h xsaveintrin.h xsaveoptintrin.h
   avx512cdintrin.h avx512erintrin.h avx512pfintrin.h
   shaintrin.h clflushoptintrin.h xsavecintrin.h
-  xsavesintrin.h avx512dqintrin.h avx512bwintrin.h
-  avx512vlintrin.h avx512vlbwintrin.h avx512vldqintrin.h
-  avx512ifmaintrin.h avx512ifmavlintrin.h 
avx512vbmiintrin.h
-  avx512vbmivlintrin.h avx5124fmapsintrin.h 
avx5124vnniwintrin.h
+  xsavesintrin.h avx512dqintrin.h avx512dqavx10_1intrin.h
+  avx512bwintrin.h avx512vlintrin.h avx512vlbwintrin.h
+  avx512vldqintrin.h avx512ifmaintrin.h 
avx512ifmavlintrin.h
+  avx512vbmiintrin.h avx512vbmivlintrin.h
+  avx5124fmapsintrin.h avx5124vnniwintrin.h
   avx512vpopcntdqintrin.h clwbintrin.h mwaitxintrin.h
   clzerointrin.h pkuintrin.h sgxintrin.h cetintrin.h
   gfniintrin.h cet.h avx512vbmi2intrin.h
diff --git a/gcc/config/i386/avx512dqavx10_1intrin.h 
b/gcc/config/i386/avx512dqavx10_1intrin.h
new file mode 100644
index 000..4621f24863b
--- /dev/null
+++ b/gcc/config/i386/avx512dqavx10_1intrin.h
@@ -0,0 +1,634 @@
+/* Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use  directly; include  
instead."
+#endif
+
+#ifndef _AVX512DQAVX10_1INTRIN_H_INCLUDED
+#define _AVX512DQAVX10_1INTRIN_H_INCLUDED
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))

[PATCH 2/2] [PATCH 2/2] Support AVX10.1 for AVX512DQ intrins

2023-08-17 Thread Haochen Jiang via Gcc-patches
gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10_1-kaddb-1.c: New test.
* gcc.target/i386/avx10_1-kaddw-1.c: Ditto.
* gcc.target/i386/avx10_1-kandb-1.c: Ditto.
* gcc.target/i386/avx10_1-kandnb-1.c: Ditto.
* gcc.target/i386/avx10_1-kmovb-1.c: Ditto.
* gcc.target/i386/avx10_1-kmovb-2.c: Ditto.
* gcc.target/i386/avx10_1-kmovb-3.c: Ditto.
* gcc.target/i386/avx10_1-kmovb-4.c: Ditto.
* gcc.target/i386/avx10_1-knotb-1.c: Ditto.
* gcc.target/i386/avx10_1-korb-1.c: Ditto.
* gcc.target/i386/avx10_1-kortestb-1.c: Ditto.
* gcc.target/i386/avx10_1-kshiftlb-1.c: Ditto.
* gcc.target/i386/avx10_1-kshiftrb-1.c: Ditto.
* gcc.target/i386/avx10_1-ktestb-1.c: Ditto.
* gcc.target/i386/avx10_1-ktestw-1.c: Ditto.
* gcc.target/i386/avx10_1-kxnorb-1.c: Ditto.
* gcc.target/i386/avx10_1-kxorb-1.c: Ditto.
* gcc.target/i386/avx10_1-vfpclasssd-1.c: New test.
* gcc.target/i386/avx10_1-vfpcla-1.c: Ditto.
* gcc.target/i386/avx10_1-vpextr-1.c: Ditto.
* gcc.target/i386/avx10_1-vpinsr-1.c: Ditto.
* gcc.target/i386/avx10_1-vrangesd-1.c: Ditto.
* gcc.target/i386/avx10_1-vrangess-1.c: Ditto.
* gcc.target/i386/avx10_1-vreducesd-1.c: Ditto.
* gcc.target/i386/avx10_1-vreducess-1.c: Ditto.
---
 .../gcc.target/i386/avx10_1-kaddb-1.c | 12 +
 .../gcc.target/i386/avx10_1-kaddw-1.c | 12 +
 .../gcc.target/i386/avx10_1-kandb-1.c | 16 ++
 .../gcc.target/i386/avx10_1-kandnb-1.c| 16 ++
 .../gcc.target/i386/avx10_1-kmovb-1.c | 15 ++
 .../gcc.target/i386/avx10_1-kmovb-2.c | 16 ++
 .../gcc.target/i386/avx10_1-kmovb-3.c | 17 ++
 .../gcc.target/i386/avx10_1-kmovb-4.c | 15 ++
 .../gcc.target/i386/avx10_1-knotb-1.c | 15 ++
 .../gcc.target/i386/avx10_1-korb-1.c  | 16 ++
 .../gcc.target/i386/avx10_1-kortestb-1.c  | 16 ++
 .../gcc.target/i386/avx10_1-kshiftlb-1.c  | 16 ++
 .../gcc.target/i386/avx10_1-kshiftrb-1.c  | 16 ++
 .../gcc.target/i386/avx10_1-ktestb-1.c| 16 ++
 .../gcc.target/i386/avx10_1-ktestw-1.c| 16 ++
 .../gcc.target/i386/avx10_1-kxnorb-1.c| 16 ++
 .../gcc.target/i386/avx10_1-kxorb-1.c | 16 ++
 .../gcc.target/i386/avx10_1-vfpclasssd-1.c| 16 ++
 .../gcc.target/i386/avx10_1-vfpcla-1.c| 16 ++
 .../gcc.target/i386/avx10_1-vpextr-1.c| 53 +++
 .../gcc.target/i386/avx10_1-vpinsr-1.c| 33 
 .../gcc.target/i386/avx10_1-vrangesd-1.c  | 26 +
 .../gcc.target/i386/avx10_1-vrangess-1.c  | 25 +
 .../gcc.target/i386/avx10_1-vreducesd-1.c | 31 +++
 .../gcc.target/i386/avx10_1-vreducess-1.c | 30 +++
 25 files changed, 492 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-kaddb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-kaddw-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-kandb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-kandnb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-kmovb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-kmovb-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-kmovb-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-kmovb-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-knotb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-korb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-kortestb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-kshiftlb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-kshiftrb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-ktestb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-ktestw-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-kxnorb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-kxorb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vfpclasssd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vfpcla-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vpextr-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vpinsr-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vrangesd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vrangess-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vreducesd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vreducess-1.c

diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-kaddb-1.c 
b/gcc/testsuite/gcc.target/i386/avx10_1-kaddb-1.c
new file mode 100644
index 000..6da7b497722
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_1-kaddb-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx10.1 -O2" } */
+/* { dg-final { 

[PATCH 0/2] Support AVX10.1 for AVX512DQ intrins

2023-08-17 Thread Haochen Jiang via Gcc-patches
Hi all,

I have just checked in the first nine patches for AVX10.1 after
one day waiting since Hongtao said ok.

These two patches aimed to add AVX512DQ scalar intrins to AVX10.1.

Regtested on on x86_64-pc-linux-gnu. Ok for trunk?

Also, We proposed to commit the patches step by step in the following
weeks to see that if there is no unexpected regressions on trunk.

The schedule comes following:

This week: AVX512DQ 128/256/scalar intrins

Week 8/21-8/27: AVX512BW 128/256/scalar intrins

Week 8/28-9/3: AVX512VL, AVX512F, AVX512BF16, AVX512VBMI, AVX512VNNI,
AVX512IFMA, AVX512VBMI2, AVX512BITALG, AVX512VPOPCNTDQ 128/256/scalar
intrins

Week 9/4-9/10: AVX512FP16 128/256/scalar intrins and GFNI, VPCLMULQDQ,
VAES AVX10 check

Week 9/11-9/17: AVX512 optimization migration to AVX10.1

Thx,
Haochen





[PATCH 6/6] Support AVX10.1 for AVX512DQ+AVX512VL intrins

2023-08-08 Thread Haochen Jiang via Gcc-patches
gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10_1-vextractf64x2-1.c: New test.
* gcc.target/i386/avx10_1-vextracti64x2-1.c: Ditto.
* gcc.target/i386/avx10_1-vfpclasspd-1.c: Ditto.
* gcc.target/i386/avx10_1-vfpclassps-1.c: Ditto.
* gcc.target/i386/avx10_1-vinsertf64x2-1.c: Ditto.
* gcc.target/i386/avx10_1-vinserti64x2-1.c: Ditto.
* gcc.target/i386/avx10_1-vrangepd-1.c: Ditto.
* gcc.target/i386/avx10_1-vrangeps-1.c: Ditto.
* gcc.target/i386/avx10_1-vreducepd-1.c: Ditto.
* gcc.target/i386/avx10_1-vreduceps-1.c: Ditto.
---
 .../gcc.target/i386/avx10_1-vextractf64x2-1.c | 18 
 .../gcc.target/i386/avx10_1-vextracti64x2-1.c | 19 
 .../gcc.target/i386/avx10_1-vfpclasspd-1.c| 21 ++
 .../gcc.target/i386/avx10_1-vfpclassps-1.c| 21 ++
 .../gcc.target/i386/avx10_1-vinsertf64x2-1.c  | 18 
 .../gcc.target/i386/avx10_1-vinserti64x2-1.c  | 18 
 .../gcc.target/i386/avx10_1-vrangepd-1.c  | 27 +
 .../gcc.target/i386/avx10_1-vrangeps-1.c  | 27 +
 .../gcc.target/i386/avx10_1-vreducepd-1.c | 29 +++
 .../gcc.target/i386/avx10_1-vreduceps-1.c | 29 +++
 10 files changed, 227 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vextractf64x2-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vextracti64x2-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vfpclasspd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vfpclassps-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vinsertf64x2-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vinserti64x2-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vrangepd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vrangeps-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vreducepd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vreduceps-1.c

diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-vextractf64x2-1.c 
b/gcc/testsuite/gcc.target/i386/avx10_1-vextractf64x2-1.c
new file mode 100644
index 000..4c7e54dc198
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_1-vextractf64x2-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx10.1 -O2" } */
+/* { dg-final { scan-assembler-times "vextractf64x2\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+.{7}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vextractf64x2\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+.{7}\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vextractf64x2\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+.{7}\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+
+#include 
+
+volatile __m256d x;
+volatile __m128d y;
+
+void extern
+avx10_1_test (void)
+{
+  y = _mm256_extractf64x2_pd (x, 1);
+  y = _mm256_mask_extractf64x2_pd (y, 2, x, 1);
+  y = _mm256_maskz_extractf64x2_pd (2, x, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-vextracti64x2-1.c 
b/gcc/testsuite/gcc.target/i386/avx10_1-vextracti64x2-1.c
new file mode 100644
index 000..c0bd7700d52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_1-vextracti64x2-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx10.1 -O2" } */
+/* { dg-final { scan-assembler-times "vextracti64x2\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+.{7}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vextracti64x2\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+.{7}\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vextracti64x2\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+.{7}\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+
+#include 
+
+volatile __m256i x;
+volatile __m128i y;
+
+void extern
+avx10_1_test (void)
+{
+  y = _mm256_extracti64x2_epi64 (x, 1);
+  y = _mm256_mask_extracti64x2_epi64 (y, 2, x, 1);
+  y = _mm256_maskz_extracti64x2_epi64 (2, x, 1);
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-vfpclasspd-1.c 
b/gcc/testsuite/gcc.target/i386/avx10_1-vfpclasspd-1.c
new file mode 100644
index 000..806ba800023
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_1-vfpclasspd-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx10.1 -O2" } */
+/* { dg-final { scan-assembler-times "vfpclasspdy\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfpclasspdx\[ 
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfpclasspdy\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 
1 } } */
+/* { dg-final { scan-assembler-times "vfpclasspdx\[ 
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 
1 } } */
+
+#include 
+
+volatile __m256d x256;
+volatile __m128d x128;
+volatile __mmask8 m;
+
+void extern
+avx10_1_test (void)
+{
+  m = _mm256_fpclass_pd_mask 

[PATCH 4/6] Support AVX10.1 for AVX512DQ+AVX512VL intrins

2023-08-08 Thread Haochen Jiang via Gcc-patches
gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10_1-abs-copysign-1.c: New test.
* gcc.target/i386/avx10_1-vandpd-1.c: Ditto.
* gcc.target/i386/avx10_1-vandps-1.c: Ditto.
* gcc.target/i386/avx10_1-vcvtps2qq-1.c: Ditto.
* gcc.target/i386/avx10_1-vcvtps2uqq-1.c: Ditto.
* gcc.target/i386/avx10_1-vcvtqq2pd-1.c: Ditto.
* gcc.target/i386/avx10_1-vcvtqq2ps-1.c: Ditto.
* gcc.target/i386/avx10_1-vcvtuqq2pd-1.c: Ditto.
* gcc.target/i386/avx10_1-vcvtuqq2ps-1.c: Ditto.
* gcc.target/i386/avx10_1-vorpd-1.c: Ditto.
* gcc.target/i386/avx10_1-vorps-1.c: Ditto.
* gcc.target/i386/avx10_1-vpmovd2m-1.c: Ditto.
* gcc.target/i386/avx10_1-vpmovm2d-1.c: Ditto.
* gcc.target/i386/avx10_1-vpmovm2q-1.c: Ditto.
* gcc.target/i386/avx10_1-vpmovq2m-1.c: Ditto.
* gcc.target/i386/avx10_1-vxorpd-1.c: Ditto.
* gcc.target/i386/avx10_1-vxorps-1.c: Ditto.
---
 .../gcc.target/i386/avx10_1-abs-copysign-1.c  | 69 +++
 .../gcc.target/i386/avx10_1-vandpd-1.c| 21 ++
 .../gcc.target/i386/avx10_1-vandps-1.c| 21 ++
 .../gcc.target/i386/avx10_1-vcvtps2qq-1.c | 28 
 .../gcc.target/i386/avx10_1-vcvtps2uqq-1.c| 27 
 .../gcc.target/i386/avx10_1-vcvtqq2pd-1.c | 27 
 .../gcc.target/i386/avx10_1-vcvtqq2ps-1.c | 26 +++
 .../gcc.target/i386/avx10_1-vcvtuqq2pd-1.c| 27 
 .../gcc.target/i386/avx10_1-vcvtuqq2ps-1.c| 27 
 .../gcc.target/i386/avx10_1-vorpd-1.c | 22 ++
 .../gcc.target/i386/avx10_1-vorps-1.c | 22 ++
 .../gcc.target/i386/avx10_1-vpmovd2m-1.c  | 17 +
 .../gcc.target/i386/avx10_1-vpmovm2d-1.c  | 17 +
 .../gcc.target/i386/avx10_1-vpmovm2q-1.c  | 17 +
 .../gcc.target/i386/avx10_1-vpmovq2m-1.c  | 17 +
 .../gcc.target/i386/avx10_1-vxorpd-1.c| 23 +++
 .../gcc.target/i386/avx10_1-vxorps-1.c| 22 ++
 17 files changed, 430 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-abs-copysign-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vandpd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vandps-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vcvtps2qq-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vcvtps2uqq-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vcvtqq2pd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vcvtqq2ps-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vcvtuqq2pd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vcvtuqq2ps-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vorpd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vorps-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vpmovd2m-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vpmovm2d-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vpmovm2q-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vpmovq2m-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vxorpd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vxorps-1.c

diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-abs-copysign-1.c 
b/gcc/testsuite/gcc.target/i386/avx10_1-abs-copysign-1.c
new file mode 100644
index 000..e9e45e44051
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_1-abs-copysign-1.c
@@ -0,0 +1,69 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-Ofast -mavx10.1" } */
+
+void
+f1 (float x)
+{
+  register float a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = __builtin_fabsf (a);
+  asm volatile ("" : "+v" (a));
+}
+/*
+void
+f2 (float x, float y)
+{
+  register float a __asm ("xmm16"), b __asm ("xmm17");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b));
+  a = __builtin_copysignf (a, b);
+  asm volatile ("" : "+v" (a));
+}
+*/
+void
+f3 (float x)
+{
+  register float a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = -a;
+  asm volatile ("" : "+v" (a));
+}
+
+void
+f4 (double x)
+{
+  register double a __asm ("xmm18");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = __builtin_fabs (a);
+  asm volatile ("" : "+v" (a));
+}
+/*
+void
+f5 (double x, double y)
+{
+  register double a __asm ("xmm18"), b __asm ("xmm19");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b));
+  a = __builtin_copysign (a, b);
+  asm volatile ("" : "+v" (a));
+}
+*/
+void
+f6 (double x)
+{
+  register double a __asm ("xmm18");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = -a;
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vandps\[^\n\r\]*xmm16" } } */
+/* { dg-final { scan-assembler "vxorps\[^\n\r\]*xmm16" } } */
+/* { dg-final { scan-assembler "vandpd\[^\n\r\]*xmm18" } } */
+/* { dg-final { scan-assembler "vxorpd\[^\n\r\]*xmm18" } } */
diff --git 

[PATCH 5/6] Support AVX10.1 for AVX512DQ+AVX512VL intrins

2023-08-08 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* config/i386/avx512vldqintrin.h: Remove target attribute.
* config/i386/i386-builtin.def (BDESC):
Add OPTION_MASK_ISA2_AVX10_1.
* config/i386/sse.md (VF_AVX512VLDQ_AVX10_1): New.
(VFH_AVX512VLDQ_AVX10_1): Ditto.
(VF1_AVX512VLDQ_AVX10_1): Ditto.
(reducep):
Change iterator to VFH_AVX512VLDQ_AVX10_1. Remove target check.
(vec_pack_float_): Change iterator to
VI8_AVX512VLDQ_AVX10_1. Remove target check.
(vec_unpack_fix_trunc_lo_): Change iterator to
VF1_AVX512VLDQ_AVX10_1. Remove target check.
(vec_unpack_fix_trunc_hi_): Ditto.
(VI48F_256_DQVL_AVX10_1): Rename from VI48F_256_DQ.
(avx512vl_vextractf128): Change iterator to
VI48F_256_DQVL_AVX10_1. Remove target check.
(vec_extract_hi__mask): Add TARGET_AVX10_1.
(vec_extract_hi_): Ditto.
(avx512vl_vinsert): Ditto.
(vec_set_lo_): Ditto.
(vec_set_hi_): Ditto.
(avx512dq_rangep): Change
iterator to VF_AVX512VLDQ_AVX10_1. Remove target check.
(avx512dq_fpclass): Change
iterator to VFH_AVX512VLDQ_AVX10_1. Remove target check.
* config/i386/subst.md (mask_avx512dq_condition): Add
TARGET_AVX10_1.
(mask_scalar_merge): Ditto.
---
 gcc/config/i386/avx512vldqintrin.h | 11 
 gcc/config/i386/i386-builtin.def   | 32 +-
 gcc/config/i386/sse.md | 94 ++
 gcc/config/i386/subst.md   |  4 +-
 4 files changed, 76 insertions(+), 65 deletions(-)

diff --git a/gcc/config/i386/avx512vldqintrin.h 
b/gcc/config/i386/avx512vldqintrin.h
index a8d14a4efc9..1fbf93a0b52 100644
--- a/gcc/config/i386/avx512vldqintrin.h
+++ b/gcc/config/i386/avx512vldqintrin.h
@@ -1331,12 +1331,6 @@ _mm256_movepi64_mask (__m256i __A)
   return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
 }
 
-#if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
-#pragma GCC push_options
-#pragma GCC target("avx512vl,avx512dq")
-#define __DISABLE_AVX512VLDQ__
-#endif /* __AVX512VLDQ__ */
-
 #ifdef __OPTIMIZE__
 extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -2008,9 +2002,4 @@ _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, 
__m128d __B,
 
 #endif
 
-#ifdef __DISABLE_AVX512VLDQ__
-#undef __DISABLE_AVX512VLDQ__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512VLDQ__ */
-
 #endif /* _AVX512VLDQINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index aa0a29caa9f..34768552e78 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -1782,8 +1782,8 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, 
CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__b
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vec_dupv8sf_mask, 
"__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, 
(int) V8SF_FTYPE_V4SF_V8SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vec_dupv4sf_mask, 
"__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, 
(int) V4SF_FTYPE_V4SF_V4SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vec_dupv4df_mask, 
"__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, 
(int) V4DF_FTYPE_V2DF_V4DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, 
CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", 
IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, 
CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", 
IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 
OPTION_MASK_ISA2_AVX10_1, CODE_FOR_avx512vl_vextractf128v4df, 
"__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, 
(int) V2DF_FTYPE_V4DF_INT_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 
OPTION_MASK_ISA2_AVX10_1, CODE_FOR_avx512vl_vextractf128v4di, 
"__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, 
(int) V2DI_FTYPE_V4DI_INT_V2DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vinsertv8sf, 
"__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, 
(int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vinsertv8si, 
"__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, 
(int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, 
CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", 
IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI)
@@ -1810,10 +1810,10 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, 
CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__
 BDESC (OPTION_MASK_ISA_AVX512VL, 

[PATCH 3/6] Support AVX10.1 for AVX512DQ+AVX512VL intrins

2023-08-08 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* config/i386/avx512vldqintrin.h: Remove target attribute.
* config/i386/i386-builtin.def (BDESC):
Add OPTION_MASK_ISA2_AVX10_1.
* config/i386/i386.cc (standard_sse_constant_opcode): Add 
TARGET_AVX10_1.
* config/i386/i386.md: Add new isa attribute
avx10_1_or_avx512vl.
* config/i386/sse.md: (VI48_AVX512VL_AVX10_1): New.
(VI48_AVX512VLDQ_AVX10_1): Ditto.
(VF2_AVX512VL): Remove.
(VI8_256_512VLDQ_AVX10_1): Rename from VI8_256_512.
Add TARGET_AVX10_1.
(*3): Change isa attribute to
avx10_1_or_avx512dq. Add TARGET_AVX10_1.
(3): Add TARGET_AVX10_1. Change isa attr
to avx10_1_or_avx512vl.
(avx512dq_cvtps2qq):
Change iterator to VI8_256_512VLDQ_AVX10_1. Remove target check.
(avx512dq_cvtps2qqv2di):
Add TARGET_AVX10_1.
(avx512dq_cvtps2uqq):
Change iterator to VI8_256_512VLDQ_AVX10_1. Remove target check.
(avx512dq_cvtps2uqqv2di):
Add TARGET_AVX10_1.

(float2):
Change iterator to VF2_AVX512VLDQ_AVX10_1. Remove target check.

(float2):
Change iterator to VF1_128_256VLDQ_AVX10_1. Remove target check.
(floatv4div4sf2):
Add TARGET_AVX10_1.
(avx512dq_floatv2div2sf2): Ditto.
(*avx512dq_floatv2div2sf2): Ditto.
(floatv2div2sf2): Ditto.
(floatv2div2sf2_mask): Ditto.
(*floatv2div2sf2_mask): Ditto.
(*floatv2div2sf2_mask_1): Ditto.
(_cvt2mask):
Change iterator to VI48_AVX512VLDQ_AVX10_1. Remove target check.
(_cvtmask2): Ditto.
(*_cvtmask2):
Change iterator to VI48_AVX512VL_AVX10_1. Remove target check.
Change when constraint is enabled.
---
 gcc/config/i386/avx512vldqintrin.h |  12 +--
 gcc/config/i386/i386-builtin.def   |  64 
 gcc/config/i386/i386.cc|   8 +-
 gcc/config/i386/sse.md | 114 +
 4 files changed, 109 insertions(+), 89 deletions(-)

diff --git a/gcc/config/i386/avx512vldqintrin.h 
b/gcc/config/i386/avx512vldqintrin.h
index 4b8006f7b73..a8d14a4efc9 100644
--- a/gcc/config/i386/avx512vldqintrin.h
+++ b/gcc/config/i386/avx512vldqintrin.h
@@ -673,12 +673,6 @@ _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B)
 (__mmask8) __U);
 }
 
-#if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
-#pragma GCC push_options
-#pragma GCC target("avx512vl,avx512dq")
-#define __DISABLE_AVX512VLDQ__
-#endif /* __AVX512VLDQ__ */
-
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtps_epi64 (__m128 __A)
@@ -1337,6 +1331,12 @@ _mm256_movepi64_mask (__m256i __A)
   return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
 }
 
+#if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
+#pragma GCC push_options
+#pragma GCC target("avx512vl,avx512dq")
+#define __DISABLE_AVX512VLDQ__
+#endif /* __AVX512VLDQ__ */
+
 #ifdef __OPTIMIZE__
 extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 18d8966f0de..aa0a29caa9f 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -1710,14 +1710,14 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, 
CODE_FOR_subv2df3_mask, "__builtin_ia32_subp
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_subv4df3_mask, 
"__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) 
V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_subv4sf3_mask, 
"__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) 
V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_subv8sf3_mask, 
"__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) 
V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, 
CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", 
IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, 
CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", 
IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, 
CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", 
IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, 
CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", 
IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, 
CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", 
IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ | 

[PATCH 2/6] Support AVX10.1 for AVX512DQ+AVX512VL intrins

2023-08-08 Thread Haochen Jiang via Gcc-patches
gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10_1-vandnpd-1.c: New test.
* gcc.target/i386/avx10_1-vandnps-1.c: Ditto.
* gcc.target/i386/avx10_1-vbroadcastf32x2-1.c: Ditto.
* gcc.target/i386/avx10_1-vbroadcastf64x2-1.c: Ditto.
* gcc.target/i386/avx10_1-vbroadcasti32x2-1.c: Ditto.
* gcc.target/i386/avx10_1-vbroadcasti64x2-1.c: Ditto.
* gcc.target/i386/avx10_1-vcvtpd2qq-1.c: Ditto.
* gcc.target/i386/avx10_1-vcvtpd2uqq-1.c: Ditto.
* gcc.target/i386/avx10_1-vcvttpd2qq-1.c: Ditto.
* gcc.target/i386/avx10_1-vcvttpd2uqq-1.c: Ditto.
* gcc.target/i386/avx10_1-vcvttps2qq-1.c: Ditto.
* gcc.target/i386/avx10_1-vcvttps2uqq-1.c: Ditto.
* gcc.target/i386/avx10_1-vpmullq-1.c: Ditto.
---
 .../gcc.target/i386/avx10_1-vandnpd-1.c   | 21 +
 .../gcc.target/i386/avx10_1-vandnps-1.c   | 21 +
 .../i386/avx10_1-vbroadcastf32x2-1.c  | 19 
 .../i386/avx10_1-vbroadcastf64x2-1.c  | 19 
 .../i386/avx10_1-vbroadcasti32x2-1.c  | 25 
 .../i386/avx10_1-vbroadcasti64x2-1.c  | 19 
 .../gcc.target/i386/avx10_1-vcvtpd2qq-1.c | 29 ++
 .../gcc.target/i386/avx10_1-vcvtpd2uqq-1.c| 29 ++
 .../gcc.target/i386/avx10_1-vcvttpd2qq-1.c| 30 +++
 .../gcc.target/i386/avx10_1-vcvttpd2uqq-1.c   | 29 ++
 .../gcc.target/i386/avx10_1-vcvttps2qq-1.c| 27 +
 .../gcc.target/i386/avx10_1-vcvttps2uqq-1.c   | 26 
 .../gcc.target/i386/avx10_1-vpmullq-1.c   | 24 +++
 13 files changed, 318 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vandnpd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vandnps-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vbroadcastf32x2-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vbroadcastf64x2-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vbroadcasti32x2-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vbroadcasti64x2-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vcvtpd2qq-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vcvtpd2uqq-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vcvttpd2qq-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vcvttpd2uqq-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vcvttps2qq-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vcvttps2uqq-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-vpmullq-1.c

diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-vandnpd-1.c 
b/gcc/testsuite/gcc.target/i386/avx10_1-vandnpd-1.c
new file mode 100644
index 000..a9a8bd7ca8b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_1-vandnpd-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx10.1 -O2" } */
+/* { dg-final { scan-assembler-times "vandnpd\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vandnpd\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vandnpd\[ 
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vandnpd\[ 
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include 
+
+volatile __m256d y;
+volatile __m128d x;
+volatile __mmask8 m;
+
+void extern
+avx10_1_test (void)
+{
+  y = _mm256_mask_andnot_pd (y, m, y, y);
+  y = _mm256_maskz_andnot_pd (m, y, y);
+  x = _mm_mask_andnot_pd (x, m, x, x);
+  x = _mm_maskz_andnot_pd (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-vandnps-1.c 
b/gcc/testsuite/gcc.target/i386/avx10_1-vandnps-1.c
new file mode 100644
index 000..c33141021cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_1-vandnps-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx10.1 -O2" } */
+/* { dg-final { scan-assembler-times "vandnps\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vandnps\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vandnps\[ 
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vandnps\[ 
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include 
+
+volatile __m256 y;
+volatile __m128 x;
+volatile __mmask8 m;
+
+void extern
+avx10_1_test (void)
+{
+  y = _mm256_mask_andnot_ps (y, m, y, y);
+  y = _mm256_maskz_andnot_ps (m, y, y);
+  x = _mm_mask_andnot_ps (x, m, x, x);
+  x = _mm_maskz_andnot_ps (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-vbroadcastf32x2-1.c 
b/gcc/testsuite/gcc.target/i386/avx10_1-vbroadcastf32x2-1.c
new file mode 100644

[PATCH 1/6] Support AVX10.1 for AVX512DQ+AVX512VL intrins

2023-08-08 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* config/i386/avx512vldqintrin.h: Remove target attribute.
* config/i386/i386-builtin.def (BDESC):
Add OPTION_MASK_ISA2_AVX10_1.
* config/i386/i386-builtins.cc (def_builtin): Handle AVX10_1.
* config/i386/i386-expand.cc
(ix86_check_builtin_isa_match): Ditto.
(ix86_expand_sse2_mulvxdi3): Add TARGET_AVX10_1.
* config/i386/i386.md: Add new isa attribute avx10_1_or_avx512dq
and avx10_1_or_avx512vl.
* config/i386/sse.md: (VF2_AVX512VLDQ_AVX10_1): New.
(VF1_128_256VLDQ_AVX10_1): Ditto.
(VI8_AVX512VLDQ_AVX10_1): Ditto.
(_andnot3):
Add TARGET_AVX10_1 and change isa attr from avx512dq to
avx10_1_or_avx512dq.
(*andnot3): Add TARGET_AVX10_1 and change isa attr from
avx512vl to avx10_1_or_avx512vl.

(fix_trunc2):
Change iterator to VF2_AVX512VLDQ_AVX10_1. Remove target check.
(fix_notrunc2):
Ditto.
(ufix_notrunc2):
Ditto.

(fix_trunc2):
Change iterator to VF1_128_256VLDQ_AVX10_1. Remove target check.
(avx512dq_fix_truncv2sfv2di2):
Add TARGET_AVX10_1.
(fix_truncv2sfv2di2): Ditto.
(cond_mul): Change iterator to VI8_AVX10_1_AVX512DQVL.
Remove target check.
(avx512dq_mul3): Ditto.
(*avx512dq_mul3): Ditto.
(VI4F_BRCST32x2): Add TARGET_AVX512DQ and TARGET_AVX10_1.
(avx512dq_broadcast):
Remove target check.
(VI8F_BRCST64x2): Add TARGET_AVX512DQ and TARGET_AVX10_1.
(avx512dq_broadcast_1):
Remove target check.
* config/i386/subst.md (mask_mode512bit_condition): Add TARGET_AVX10_1.
(mask_avx512vl_condition): Ditto.
(mask): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-1.c: Add -mavx10.1.
* gcc.target/i386/avx-2.c: Ditto.
* gcc.target/i386/sse-26.c: Skip AVX512VLDQ intrin file.
---
 gcc/config/i386/avx512vldqintrin.h | 12 ++--
 gcc/config/i386/i386-builtin.def   | 46 ++--
 gcc/config/i386/i386-builtins.cc   |  9 +--
 gcc/config/i386/i386-expand.cc |  8 ++-
 gcc/config/i386/i386.md|  7 +-
 gcc/config/i386/sse.md | 97 --
 gcc/config/i386/subst.md   |  7 +-
 gcc/testsuite/gcc.target/i386/avx-1.c  |  2 +-
 gcc/testsuite/gcc.target/i386/avx-2.c  |  2 +-
 gcc/testsuite/gcc.target/i386/sse-26.c |  6 ++
 10 files changed, 117 insertions(+), 79 deletions(-)

diff --git a/gcc/config/i386/avx512vldqintrin.h 
b/gcc/config/i386/avx512vldqintrin.h
index be4d59c34e4..4b8006f7b73 100644
--- a/gcc/config/i386/avx512vldqintrin.h
+++ b/gcc/config/i386/avx512vldqintrin.h
@@ -28,12 +28,6 @@
 #ifndef _AVX512VLDQINTRIN_H_INCLUDED
 #define _AVX512VLDQINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
-#pragma GCC push_options
-#pragma GCC target("avx512vl,avx512dq")
-#define __DISABLE_AVX512VLDQ__
-#endif /* __AVX512VLDQ__ */
-
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvttpd_epi64 (__m256d __A)
@@ -679,6 +673,12 @@ _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B)
 (__mmask8) __U);
 }
 
+#if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
+#pragma GCC push_options
+#pragma GCC target("avx512vl,avx512dq")
+#define __DISABLE_AVX512VLDQ__
+#endif /* __AVX512VLDQ__ */
+
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtps_epi64 (__m128 __A)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 8738b3b6a8a..18d8966f0de 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -1718,31 +1718,31 @@ BDESC (OPTION_MASK_ISA_AVX512DQ | 
OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_iorv4df3
 BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, 
CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", 
IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, 
CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", 
IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, 
CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", 
IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, 
CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", 
IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, 
CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", 
IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ | 

[PATCH 1/3] Initial support for AVX10.1

2023-08-08 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features):
Add avx10_set and version and detect avx10.1.
(cpu_indicator_init): Handle avx10.1-512.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_AVX10_512BIT_SET): New.
(OPTION_MASK_ISA2_AVX10_1_SET): Ditto.
(OPTION_MASK_ISA2_AVX10_512BIT_UNSET): Ditto.
(OPTION_MASK_ISA2_AVX10_1_UNSET): Ditto.
(OPTION_MASK_ISA2_AVX2_UNSET): Modify for AVX10_1.
(ix86_handle_option): Handle -mavx10.1, -mavx10.1-256 and
-mavx10.1-512.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_AVX10_512BIT, FEATURE_AVX10_1 and
FEATURE_AVX10_512BIT.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
AVX10_512BIT, AVX10_1 and AVX10_1_512.
* config/i386/constraints.md (Yk): Add AVX10_1.
(Yv): Ditto.
(k): Ditto.
* config/i386/cpuid.h (bit_AVX10): New.
(bit_AVX10_256): Ditto.
(bit_AVX10_512): Ditto.
* config/i386/i386-c.cc (ix86_target_macros_internal):
Define AVX10_512BIT and AVX10_1.
* config/i386/i386-isa.def
(AVX10_512BIT): Add DEF_PTA(AVX10_512BIT).
(AVX10_1): Add DEF_PTA(AVX10_1).
* config/i386/i386-options.cc (isa2_opts): Add -mavx10.1.
(ix86_valid_target_attribute_inner_p): Handle avx10-512bit, avx10.1
and avx10.1-512.
(ix86_option_override_internal): Enable AVX512{F,VL,BW,DQ,CD,BF16,
FP16,VBMI,VBMI2,VNNI,IFMA,BITALG,VPOPCNTDQ} features for avx10.1-512.
(ix86_valid_target_attribute_inner_p): Handle AVX10_1.
* config/i386/i386.cc (ix86_get_ssemov): Add AVX10_1.
(ix86_conditional_register_usage): Ditto.
(ix86_hard_regno_mode_ok): Ditto.
(ix86_rtx_costs): Ditto.
* config/i386/i386.h (VALID_MASK_AVX10_MODE): New macro.
* config/i386/i386.opt: Add option -mavx10.1, -mavx10.1-256 and
-mavx10.1-512.
* doc/extend.texi: Document avx10.1, avx10.1-256 and avx10.1-512.
* doc/invoke.texi: Document -mavx10.1, -mavx10.1-256 and -mavx10.1-512.
* doc/sourcebuild.texi: Document target avx10.1, avx10.1-256
and avx10.1-512.

gcc/testsuite/ChangeLog:

* g++.target/i386/mv33.C: New test.
* gcc.target/i386/avx10_1-1.c: Ditto.
* gcc.target/i386/avx10_1-2.c: Ditto.
* gcc.target/i386/avx10_1-3.c: Ditto.
* gcc.target/i386/avx10_1-4.c: Ditto.
* gcc.target/i386/avx10_1-5.c: Ditto.
* gcc.target/i386/avx10_1-6.c: Ditto.
* gcc.target/i386/avx10_1-7.c: Ditto.
* gcc.target/i386/avx10_1-8.c: Ditto.
* gcc.target/i386/avx10_1-9.c: Ditto.
* gcc.target/i386/avx10_1-10.c: Ditto.
---
 gcc/common/config/i386/cpuinfo.h   | 36 +++
 gcc/common/config/i386/i386-common.cc  | 53 +-
 gcc/common/config/i386/i386-cpuinfo.h  |  3 ++
 gcc/common/config/i386/i386-isas.h |  5 ++
 gcc/config/i386/constraints.md |  6 +--
 gcc/config/i386/cpuid.h|  6 +++
 gcc/config/i386/i386-c.cc  |  4 ++
 gcc/config/i386/i386-isa.def   |  2 +
 gcc/config/i386/i386-options.cc| 26 ++-
 gcc/config/i386/i386.cc| 18 ++--
 gcc/config/i386/i386.h |  3 ++
 gcc/config/i386/i386.opt   | 19 
 gcc/doc/extend.texi| 13 ++
 gcc/doc/invoke.texi| 16 +--
 gcc/doc/sourcebuild.texi   |  9 
 gcc/testsuite/g++.target/i386/mv33.C   | 30 
 gcc/testsuite/gcc.target/i386/avx10_1-1.c  | 22 +
 gcc/testsuite/gcc.target/i386/avx10_1-10.c | 13 ++
 gcc/testsuite/gcc.target/i386/avx10_1-2.c  | 13 ++
 gcc/testsuite/gcc.target/i386/avx10_1-3.c  | 13 ++
 gcc/testsuite/gcc.target/i386/avx10_1-4.c  | 13 ++
 gcc/testsuite/gcc.target/i386/avx10_1-5.c  | 13 ++
 gcc/testsuite/gcc.target/i386/avx10_1-6.c  | 13 ++
 gcc/testsuite/gcc.target/i386/avx10_1-7.c  | 13 ++
 gcc/testsuite/gcc.target/i386/avx10_1-8.c  |  4 ++
 gcc/testsuite/gcc.target/i386/avx10_1-9.c  | 13 ++
 26 files changed, 366 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/mv33.C
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-8.c
 create mode 100644 

[PATCH 2/3] Emit a warning when disabling AVX512 with AVX10 enabled or disabling AVX10 with AVX512 enabled

2023-08-08 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* config/i386/driver-i386.cc (host_detect_local_cpu):
Do not append -mno-avx10.1 for -march=native.
* config/i386/i386-options.cc
(ix86_check_avx10): New function to check isa_flags and
isa_flags_explicit to emit warning when AVX10 is enabled
by "-m" option.
(ix86_check_avx512):  New function to check isa_flags and
isa_flags_explicit to emit warning when AVX512 is enabled
by "-m" option.
(ix86_handle_option): Do not change the flags when warning
is emitted.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10_1-11.c: New test.
* gcc.target/i386/avx10_1-12.c: Ditto.
* gcc.target/i386/avx10_1-13.c: Ditto.
* gcc.target/i386/avx10_1-14.c: Ditto.
---
 gcc/common/config/i386/i386-common.cc  | 68 +-
 gcc/config/i386/driver-i386.cc |  2 +-
 gcc/testsuite/gcc.target/i386/avx10_1-11.c |  5 ++
 gcc/testsuite/gcc.target/i386/avx10_1-12.c | 13 +
 gcc/testsuite/gcc.target/i386/avx10_1-13.c |  5 ++
 gcc/testsuite/gcc.target/i386/avx10_1-14.c | 13 +
 6 files changed, 91 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-14.c

diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index 6c3bebb1846..ec94251dd4c 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -388,6 +388,46 @@ set_malign_value (const char **flag, unsigned value)
   *flag = r;
 }
 
+/* Emit a warning when using -mno-avx512{f,vl,bw,dq,cd,bf16,fp16,vbmi,vbmi2,
+   vnni,ifma,bitalg,vpopcntdq} with -mavx10.1 and above.  */
+static bool
+ix86_check_avx10 (struct gcc_options *opts)
+{
+  if (opts->x_ix86_isa_flags2 & opts->x_ix86_isa_flags2_explicit
+  & OPTION_MASK_ISA2_AVX10_1)
+{
+  warning (0, "%<-mno-avx512{f,vl,bw,dq,cd,bf16,fp16,vbmi,vbmi2,vnni,ifma,"
+  "bitalg,vpopcntdq}%> are ignored with %<-mavx10.1%> and above");
+  return false;
+}
+
+  return true;
+}
+
+/* Emit a warning when using -mno-avx10.1 with -mavx512{f,vl,bw,dq,cd,bf16,
+   fp16,vbmi,vbmi2,vnni,ifma,bitalg,vpopcntdq}.  */
+static bool
+ix86_check_avx512 (struct gcc_options *opts)
+{
+  if ((opts->x_ix86_isa_flags & opts->x_ix86_isa_flags_explicit
+   & (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD
+ | OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512BW
+ | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512IFMA
+ | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI2
+ | OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VPOPCNTDQ
+ | OPTION_MASK_ISA_AVX512BITALG))
+  || (opts->x_ix86_isa_flags2 & opts->x_ix86_isa_flags2_explicit
+ & (OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_AVX512BF16)))
+{
+  warning (0, "%<-mno-avx10.1%> is ignored when using with "
+  "%<-mavx512{f,vl,bw,dq,cd,bf16,fp16,vbmi,vbmi2,vnni,"
+  "ifma,bitalg,vpopcntdq}%>");
+  return false;
+}
+
+  return true;
+}
+
 /* Implement TARGET_HANDLE_OPTION.  */
 
 bool
@@ -609,7 +649,7 @@ ix86_handle_option (struct gcc_options *opts,
  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F_SET;
  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512F_SET;
}
-  else
+  else if (ix86_check_avx10 (opts))
{
  opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512F_UNSET;
  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512F_UNSET;
@@ -624,7 +664,7 @@ ix86_handle_option (struct gcc_options *opts,
  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD_SET;
  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512CD_SET;
}
-  else
+  else if (ix86_check_avx10 (opts))
{
  opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512CD_UNSET;
  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512CD_UNSET;
@@ -898,7 +938,7 @@ ix86_handle_option (struct gcc_options *opts,
  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI2_SET;
  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512VBMI2_SET;
}
-  else
+  else if (ix86_check_avx10 (opts))
{
  opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512VBMI2_UNSET;
  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512VBMI2_UNSET;
@@ -913,7 +953,7 @@ ix86_handle_option (struct gcc_options *opts,
  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512FP16_SET;
  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512FP16_SET;
}
-  else
+  else if (ix86_check_avx10 (opts))
{
  opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512FP16_UNSET;
  

[PATCH 3/3] Emit a warning when AVX10 options conflict in vector width

2023-08-08 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* config/i386/driver-i386.cc (host_detect_local_cpu):
Do not append -mno-avx10-max-512bit for -march=native.
* common/config/i386/i386-common.cc
(ix86_check_avx10_vector_width): New function to check isa_flags
to emit a warning when there is a conflict in AVX10 options for
vector width.
(ix86_handle_option): Add check for avx10.1-256 and avx10.1-512.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10_1-15.c: New test.
* gcc.target/i386/avx10_1-16.c: Ditto.
* gcc.target/i386/avx10_1-17.c: Ditto.
* gcc.target/i386/avx10_1-18.c: Ditto.
---
 gcc/common/config/i386/i386-common.cc  | 20 
 gcc/config/i386/driver-i386.cc |  3 ++-
 gcc/config/i386/i386-options.cc|  2 +-
 gcc/testsuite/gcc.target/i386/avx10_1-15.c |  5 +
 gcc/testsuite/gcc.target/i386/avx10_1-16.c |  5 +
 gcc/testsuite/gcc.target/i386/avx10_1-17.c | 13 +
 gcc/testsuite/gcc.target/i386/avx10_1-18.c | 13 +
 7 files changed, 59 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-15.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-16.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-17.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-18.c

diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index ec94251dd4c..db88befc9b8 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -428,6 +428,24 @@ ix86_check_avx512 (struct gcc_options *opts)
   return true;
 }
 
+/* Emit a warning when there is a conflict vector width in AVX10 options.  */
+static void
+ix86_check_avx10_vector_width (struct gcc_options *opts, bool avx10_max_512)
+{
+  if (avx10_max_512)
+{
+  if (((opts->x_ix86_isa_flags2 | ~OPTION_MASK_ISA2_AVX10_512BIT)
+  == ~OPTION_MASK_ISA2_AVX10_512BIT)
+ && (opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_AVX10_512BIT))
+   warning (0, "The options used for AVX10 have conflict vector width, "
+"using the latter 512 as vector width");
+}
+  else if (opts->x_ix86_isa_flags2 & opts->x_ix86_isa_flags2_explicit
+  & OPTION_MASK_ISA2_AVX10_512BIT)
+warning (0, "The options used for AVX10 have conflict vector width, "
+"using the latter 256 as vector width");
+}
+
 /* Implement TARGET_HANDLE_OPTION.  */
 
 bool
@@ -1415,6 +1433,7 @@ ix86_handle_option (struct gcc_options *opts,
   return true;
 
 case OPT_mavx10_1_256:
+  ix86_check_avx10_vector_width (opts, false);
   opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_1_SET;
   opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_SET;
   opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_512BIT_SET;
@@ -1424,6 +1443,7 @@ ix86_handle_option (struct gcc_options *opts,
   return true;
 
 case OPT_mavx10_1_512:
+  ix86_check_avx10_vector_width (opts, true);
   opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_1_SET;
   opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_SET;
   opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_512BIT_SET;
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
index 227ace6ff83..f4551a74e3a 100644
--- a/gcc/config/i386/driver-i386.cc
+++ b/gcc/config/i386/driver-i386.cc
@@ -854,7 +854,8 @@ const char *host_detect_local_cpu (int argc, const char 
**argv)
  options = concat (options, " ",
isa_names_table[i].option, NULL);
  }
-   else if (isa_names_table[i].feature != FEATURE_AVX10_1)
+   else if ((isa_names_table[i].feature != FEATURE_AVX10_1)
+&& (isa_names_table[i].feature != FEATURE_AVX10_512BIT))
  options = concat (options, neg_option,
isa_names_table[i].option + 2, NULL);
  }
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index b2281fbd4b5..8f9b825b527 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -985,7 +985,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree 
args, char *p_strings[],
 ix86_opt_ix86_no,
 ix86_opt_str,
 ix86_opt_enum,
-ix86_opt_isa,
+ix86_opt_isa
   };
 
   static const struct
diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-15.c 
b/gcc/testsuite/gcc.target/i386/avx10_1-15.c
new file mode 100644
index 000..fd873c9694c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_1-15.c
@@ -0,0 +1,5 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=x86-64 -mavx10.1-512 -mavx10.1-256" } */
+/* { dg-warning "The options used for AVX10 have conflict vector width, using 
the latter 256 as vector width" "" { target *-*-* } 0 } */
+
+#include "avx10_1-1.c"
diff --git 

[PATCH] Fix a typo

2023-07-21 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch fix a typo which will not cause any behavior difference.

Commited as obvious change.

Thx,
Haochen

gcc/ChangeLog:

* config/i386/i386.opt: Fix a typo.
---
 gcc/config/i386/i386.opt | 5 -
 1 file changed, 5 deletions(-)

diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index db9956885e2..1cc8563477a 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1289,11 +1289,6 @@ Target Mask(ISA2_SM3) Var(ix86_isa_flags2) Save
 Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and
 SM3 built-in functions and code generation.
 
-mvpinsrvpextr
-Target Mask(ISA2_VPINSRVPEXTR) Var(ix86_isa_flags2) Save
-Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F,
-AVX512VL and VPINSRVPEXTR built-in functions and code generation.
-
 msha512
 Target Mask(ISA2_SHA512) Var(ix86_isa_flags2) Save
 Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and
-- 
2.31.1



[PATCH] Correct Granite Rapids{, D} documentation

2023-07-19 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch will fix the documentation error in invoke.texi where includes
AVX512VP2INTERSECT in GNR and GNR-D previously.

Commit ad obvious change and backport to GCC 13 branch.

Thx,
Haochen

gcc/Changelog:

* doc/invoke.texi: Remove AVX512VP2INTERSECT in
Granite Rapids{, D} from documentation.
---
 gcc/doc/invoke.texi | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 88e3c625030..d3c821e208a 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -32697,9 +32697,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, 
RDSEED, ADCX, PREFETCHW,
 AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
 AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, 
AVX512VBMI2,
 VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, 
AVX512-FP16,
-AVX512BF16, AMX-FP16 and PREFETCHI instruction set support.
+MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK,
+UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16, AVX512BF16, 
AMX-FP16
+and PREFETCHI instruction set support.
 
 @item graniterapids-d
 Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
@@ -32708,9 +32708,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, 
RDSEED, ADCX, PREFETCHW,
 AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
 AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, 
AVX512VBMI2,
 VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
-AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support.
+MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK,
+UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, AVX512BF16, 
AMX-FP16,
+PREFETCHI and AMX-COMPLEX instruction set support.
 
 @item k6
 AMD K6 CPU with MMX instruction set support.
-- 
2.31.1



[PATCH] [gcc-wwwdocs]gcc-13/14: Mention Intel new ISA and march support

2023-07-16 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch adds documentation to wwwdocs to mention the recent introduction
of Intel new ISA and march.

Ok for trunk?

BRs,
Haochen

---
 htdocs/gcc-13/changes.html |  4 
 htdocs/gcc-14/changes.html | 34 +-
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index 39414e18..68e8c5cc 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -593,6 +593,10 @@ You may also want to check out our
   
   GCC now supports the Intel CPU named Granite Rapids through
 -march=graniterapids.
+The switch enables the AMX-FP16, PREFETCHI ISA extensions.
+  
+  GCC now supports the Intel CPU named Granite Rapids D through
+-march=graniterapids-d.
 The switch enables the AMX-FP16, PREFETCHI and AMX-COMPLEX ISA extensions.
   
   GCC now supports AMD CPUs based on the znver4 core
diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 3f797642..dad1ba53 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -108,7 +108,39 @@ a work-in-progress.
 
 
 
-
+IA-32/x86-64
+
+  New ISA extension support for Intel AVX-VNNI-INT16 was added.
+  AVX-VNNI-INT16 intrinsics are available via the 
-mavxvnniint16
+  compiler switch.
+  
+  New ISA extension support for Intel SHA512 was added.
+  SHA512 intrinsics are available via the -msha512
+  compiler switch.
+  
+  New ISA extension support for Intel SM3 was added.
+  SM3 intrinsics are available via the -msm3
+  compiler switch.
+  
+  New ISA extension support for Intel SM4 was added.
+  SM4 intrinsics are available via the -msm4
+  compiler switch.
+  
+  GCC now supports the Intel CPU named Arrow Lake through
+-march=arrowlake.
+Based on Alder Lake, the switch further enables the AVX-IFMA,
+AVX-VNNI-INT8, AVX-NE-CONVERT and CMPccXADD ISA extensions.
+  
+  GCC now supports the Intel CPU named Arrow Lake S through
+-march=arrowlake-s.
+Based on Arrow Lake, the switch further enables the AVX-VNNI-INT16, SHA512,
+SM3 and SM4 ISA extensions.
+  
+  GCC now supports the Intel CPU named Lunar Lake through
+-march=lunarlake.
+Lunar Lake is based on Arrow Lake S.
+  
+
 
 
 
-- 
2.31.1



[PATCH] i386: Auto vectorize usdot_prod, udot_prod with AVXVNNIINT16 instruction.

2023-07-14 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch aims to auto vectorize usdot_prod and udot_prod with newly
introduced AVX-VNNI-INT16.

Also I refined the redundant mode iterator in the patch.

Regtested on x86_64-pc-linux-gnu. Ok for trunk after AVX-VNNI-INT16 patch
checked in?

BRs,
Haochen

gcc/ChangeLog:

* config/i386/sse.md (VI2_AVX2): Delete V32HI since we actually
have the same iterator. Also renaming all the occurence to
VI2_AVX2_AVX512BW.
(usdot_prod): New define_expand.
(udot_prod): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/vnniint16-auto-vectorize-1.c: New test.
* gcc.target/i386/vnniint16-auto-vectorize-2.c: Ditto.
---
 gcc/config/i386/sse.md| 98 +--
 .../i386/vnniint16-auto-vectorize-1.c | 28 ++
 .../i386/vnniint16-auto-vectorize-2.c | 76 ++
 3 files changed, 172 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/vnniint16-auto-vectorize-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vnniint16-auto-vectorize-2.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 7471932b27e..98e7f9334bc 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -545,6 +545,9 @@
V32HI (V16HI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI2_AVX2
+  [(V16HI "TARGET_AVX2") V8HI])
+
+(define_mode_iterator VI2_AVX2_AVX512BW
   [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
 
 (define_mode_iterator VI2_AVX512F
@@ -637,9 +640,6 @@
(V16HI "TARGET_AVX2") V8HI
(V8SI "TARGET_AVX2") V4SI])
 
-(define_mode_iterator VI2_AVX2_AVX512BW
-  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
-
 (define_mode_iterator VI248_AVX512VL
   [V32HI V16SI V8DI
(V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
@@ -15298,16 +15298,16 @@
 })
 
 (define_expand "mul3"
-  [(set (match_operand:VI2_AVX2 0 "register_operand")
-   (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
-  (match_operand:VI2_AVX2 2 "vector_operand")))]
+  [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand")
+   (mult:VI2_AVX2_AVX512BW (match_operand:VI2_AVX2_AVX512BW 1 
"vector_operand")
+  (match_operand:VI2_AVX2_AVX512BW 2 "vector_operand")))]
   "TARGET_SSE2 &&  && "
   "ix86_fixup_binary_operands_no_copy (MULT, mode, operands);")
 
 (define_insn "*mul3"
-  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,")
-   (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,")
-  (match_operand:VI2_AVX2 2 "vector_operand" 
"xBm,m")))]
+  [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,")
+   (mult:VI2_AVX2_AVX512BW (match_operand:VI2_AVX2_AVX512BW 1 
"vector_operand" "%0,")
+  (match_operand:VI2_AVX2_AVX512BW 2 "vector_operand" 
"xBm,m")))]
   "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
&&  && "
   "@
@@ -15320,28 +15320,28 @@
(set_attr "mode" "")])
 
 (define_expand "mul3_highpart"
-  [(set (match_operand:VI2_AVX2 0 "register_operand")
-   (truncate:VI2_AVX2
+  [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand")
+   (truncate:VI2_AVX2_AVX512BW
  (lshiftrt:
(mult:
  (any_extend:
-   (match_operand:VI2_AVX2 1 "vector_operand"))
+   (match_operand:VI2_AVX2_AVX512BW 1 "vector_operand"))
  (any_extend:
-   (match_operand:VI2_AVX2 2 "vector_operand")))
+   (match_operand:VI2_AVX2_AVX512BW 2 "vector_operand")))
(const_int 16]
   "TARGET_SSE2
&&  && "
   "ix86_fixup_binary_operands_no_copy (MULT, mode, operands);")
 
 (define_insn "*mul3_highpart"
-  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,")
-   (truncate:VI2_AVX2
+  [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,")
+   (truncate:VI2_AVX2_AVX512BW
  (lshiftrt:
(mult:
  (any_extend:
-   (match_operand:VI2_AVX2 1 "vector_operand" "%0,"))
+   (match_operand:VI2_AVX2_AVX512BW 1 "vector_operand" 
"%0,"))
  (any_extend:
-   (match_operand:VI2_AVX2 2 "vector_operand" "xBm,m")))
+   (match_operand:VI2_AVX2_AVX512BW 2 "vector_operand" 
"xBm,m")))
(const_int 16]
   "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
&&  && "
@@ -15591,8 +15591,8 @@
 (define_insn "avx512bw_pmaddwd512"
   [(set (match_operand: 0 "register_operand" "=v")
   (unspec:
-[(match_operand:VI2_AVX2 1 "register_operand" "v")
- (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
+[(match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "v")
+ (match_operand:VI2_AVX2_AVX512BW 2 "nonimmediate_operand" "vm")]
  UNSPEC_PMADDWD512))]
"TARGET_AVX512BW && "
"vpmaddwd\t{%2, %1, %0|%0, %1, %2}";
@@ -21569,16 +21569,16 @@
 })
 
 (define_expand 

[PATCH 1/4] Support Intel AVX-VNNI-INT16

2023-07-13 Thread Haochen Jiang via Gcc-patches
From: Kong Lingling 

gcc/ChangeLog

* common/config/i386/cpuinfo.h (get_available_features): Detect
avxvnniint16.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_AVXVNNIINT16_SET): New.
(OPTION_MASK_ISA2_AVXVNNIINT16_UNSET): Ditto.
(ix86_handle_option): Handle -mavxvnniint16.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_AVXVNNIINT16.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
avxvnniint16.
* config.gcc: Add avxvnniint16.h.
* config/i386/avxvnniint16intrin.h: New file.
* config/i386/cpuid.h (bit_AVXVNNIINT16): New.
* config/i386/i386-builtin.def: Add new builtins.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__AVXVNNIINT16__.
* config/i386/i386-options.cc (isa2_opts): Add -mavxvnniint16.
(ix86_valid_target_attribute_inner_p): Handle avxvnniint16intrin.h.
* config/i386/i386-isa.def: Add DEF_PTA(AVXVNNIINT16).
* config/i386/i386.opt: Add option -mavxvnniint16.
* config/i386/immintrin.h: Include avxvnniint16.h.
* config/i386/sse.md
(vpdp_): New define_insn.
* doc/extend.texi: Document avxvnniint16.
* doc/invoke.texi: Document -mavxvnniint16.
* doc/sourcebuild.texi: Document target avxvnniint16.

gcc/testsuite/ChangeLog

* g++.dg/other/i386-2.C: Add -mavxvnniint16.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/avx-check.h: Add avxvnniint16 check.
* gcc.target/i386/sse-12.c: Add -mavxvnniint16.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* lib/target-supports.exp
(check_effective_target_avxvnniint16): New.
* gcc.target/i386/avxvnniint16-1.c: Ditto.
* gcc.target/i386/avxvnniint16-vpdpwusd-2.c: Ditto.
* gcc.target/i386/avxvnniint16-vpdpwusds-2.c: Ditto.
* gcc.target/i386/avxvnniint16-vpdpwsud-2.c: Ditto.
* gcc.target/i386/avxvnniint16-vpdpwsuds-2.c: Ditto.
* gcc.target/i386/avxvnniint16-vpdpwuud-2.c: Ditto.
* gcc.target/i386/avxvnniint16-vpdpwuuds-2.c: Ditto.

Co-authored-by: Haochen Jiang 
---
 gcc/common/config/i386/cpuinfo.h  |   2 +
 gcc/common/config/i386/i386-common.cc |  22 ++-
 gcc/common/config/i386/i386-cpuinfo.h |   1 +
 gcc/common/config/i386/i386-isas.h|   2 +
 gcc/config.gcc|   2 +-
 gcc/config/i386/avxvnniint16intrin.h  | 138 ++
 gcc/config/i386/cpuid.h   |   1 +
 gcc/config/i386/i386-builtin.def  |  14 ++
 gcc/config/i386/i386-c.cc |   2 +
 gcc/config/i386/i386-isa.def  |   1 +
 gcc/config/i386/i386-options.cc   |   4 +-
 gcc/config/i386/i386.opt  |   5 +
 gcc/config/i386/immintrin.h   |   2 +
 gcc/config/i386/sse.md|  32 
 gcc/doc/extend.texi   |   5 +
 gcc/doc/invoke.texi   |  10 +-
 gcc/doc/sourcebuild.texi  |   3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |   2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |   2 +-
 gcc/testsuite/gcc.target/i386/avx-check.h |   3 +
 .../gcc.target/i386/avxvnniint16-1.c  |  43 ++
 .../gcc.target/i386/avxvnniint16-vpdpwsud-2.c |  71 +
 .../i386/avxvnniint16-vpdpwsuds-2.c   |  72 +
 .../gcc.target/i386/avxvnniint16-vpdpwusd-2.c |  71 +
 .../i386/avxvnniint16-vpdpwusds-2.c   |  72 +
 .../gcc.target/i386/avxvnniint16-vpdpwuud-2.c |  71 +
 .../i386/avxvnniint16-vpdpwuuds-2.c   |  71 +
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
 gcc/testsuite/gcc.target/i386/sse-12.c|   2 +-
 gcc/testsuite/gcc.target/i386/sse-13.c|   2 +-
 gcc/testsuite/gcc.target/i386/sse-14.c|   2 +-
 gcc/testsuite/gcc.target/i386/sse-22.c|   4 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|   2 +-
 gcc/testsuite/lib/target-supports.exp |  12 ++
 34 files changed, 735 insertions(+), 15 deletions(-)
 create mode 100644 gcc/config/i386/avxvnniint16intrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint16-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint16-vpdpwsud-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint16-vpdpwsuds-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint16-vpdpwusd-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint16-vpdpwusds-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint16-vpdpwuud-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint16-vpdpwuuds-2.c

[PATCH 3/4] Support Intel SHA512

2023-07-13 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features):
Detect SHA512.
* common/config/i386/i386-common.cc (OPTION_MASK_ISA2_SHA512_SET,
OPTION_MASK_ISA2_SHA512_UNSET): New.
(OPTION_MASK_ISA2_AVX_UNSET): Add SHA512.
(ix86_handle_option): Handle -msha512.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_SHA512.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
sha512.
* config.gcc: Add sha512intrin.h.
* config/i386/cpuid.h (bit_SHA512): New.
* config/i386/i386-builtin-types.def:
Add DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V2DI).
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__SHA512__.
* config/i386/i386-expand.cc (ix86_expand_args_builtin): Handle
V4DI_FTYPE_V4DI_V4DI_V2DI and V4DI_FTYPE_V4DI_V2DI.
* config/i386/i386-isa.def (SHA512): Add DEF_PTA(SHA512).
* config/i386/i386-options.cc (isa2_opts): Add -msha512.
(ix86_valid_target_attribute_inner_p): Handle sha512.
* config/i386/i386.opt: Add option -msha512.
* config/i386/immintrin.h: Include sha512intrin.h.
* config/i386/sse.md (vsha512msg1): New define insn.
(vsha512msg2): Ditto.
(vsha512rnds2): Ditto.
* doc/extend.texi: Document sha512.
* doc/invoke.texi: Document -msha512.
* doc/sourcebuild.texi: Document target sha512.
* config/i386/sha512intrin.h: New file.

gcc/testsuite/ChangeLog:

* g++.dg/others/i386-2.C: Add -msha512.
* g++.dg/others/i386-3.C: Ditto.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/sse-12.c: Add -msha512.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add sha512.
* gcc.target/i386/sse-23.c: Ditto.
* lib/target-supports.exp (check_effective_target_sha512): New.
* gcc.target/i386/sha512-1.c: New test.
* gcc.target/i386/sha512-check.h: Ditto.
* gcc.target/i386/sha512msg1-2.c: Ditto.
* gcc.target/i386/sha512msg2-2.c: Ditto.
* gcc.target/i386/sha512rnds2-2.c: Ditto.
---
 gcc/common/config/i386/cpuinfo.h  |  2 +
 gcc/common/config/i386/i386-common.cc | 19 -
 gcc/common/config/i386/i386-cpuinfo.h |  1 +
 gcc/common/config/i386/i386-isas.h|  1 +
 gcc/config.gcc|  2 +-
 gcc/config/i386/cpuid.h   |  1 +
 gcc/config/i386/i386-builtin-types.def|  3 +
 gcc/config/i386/i386-builtin.def  |  5 ++
 gcc/config/i386/i386-c.cc |  2 +
 gcc/config/i386/i386-expand.cc|  2 +
 gcc/config/i386/i386-isa.def  |  1 +
 gcc/config/i386/i386-options.cc   |  4 +-
 gcc/config/i386/i386.opt  | 10 +++
 gcc/config/i386/immintrin.h   |  2 +
 gcc/config/i386/sha512intrin.h| 64 ++
 gcc/config/i386/sse.md| 40 +
 gcc/doc/extend.texi   |  5 ++
 gcc/doc/invoke.texi   | 10 ++-
 gcc/doc/sourcebuild.texi  |  3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |  2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |  2 +-
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |  2 +
 gcc/testsuite/gcc.target/i386/sha512-1.c  | 18 
 gcc/testsuite/gcc.target/i386/sha512-check.h  | 43 ++
 gcc/testsuite/gcc.target/i386/sha512msg1-2.c  | 48 +++
 gcc/testsuite/gcc.target/i386/sha512msg2-2.c  | 47 ++
 gcc/testsuite/gcc.target/i386/sha512rnds2-2.c | 85 +++
 gcc/testsuite/gcc.target/i386/sse-12.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-13.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-14.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-22.c|  4 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|  2 +-
 gcc/testsuite/lib/target-supports.exp | 14 +++
 33 files changed, 436 insertions(+), 14 deletions(-)
 create mode 100644 gcc/config/i386/sha512intrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/sha512-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sha512-check.h
 create mode 100644 gcc/testsuite/gcc.target/i386/sha512msg1-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sha512msg2-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sha512rnds2-2.c

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index e5cdffe017a..0cfde3ebccd 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -879,6 +879,8 @@ get_available_features (struct __processor_model *cpu_model,
set_feature (FEATURE_AVXVNNIINT16);
  if (eax & bit_SM3)
  

[PATCH 2/4] Support Intel SM3

2023-07-13 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features):
Detect SM3.
* common/config/i386/i386-common.cc (OPTION_MASK_ISA2_SM3_SET,
OPTION_MASK_ISA2_SM3_UNSET): New.
(OPTION_MASK_ISA2_AVX_UNSET): Add SM3.
(ix86_handle_option): Handle -msm3.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_SM3.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
SM3.
* config.gcc: Add sm3intrin.h
* config/i386/cpuid.h (bit_SM3): New.
* config/i386/i386-builtin-types.def:
Add DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, V4SI, INT).
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__SM3__.
* config/i386/i386-expand.cc (ix86_expand_args_builtin): Handle
V4SI_FTYPE_V4SI_V4SI_V4SI_INT.
* config/i386/i386-isa.def (SM3): Add DEF_PTA(SM3).
* config/i386/i386-options.cc (isa2_opts): Add -msm3.
(ix86_valid_target_attribute_inner_p): Handle sm3.
* config/i386/i386.opt: Add option -msm3.
* config/i386/immintrin.h: Include sm3intrin.h.
* config/i386/sse.md (vsm3msg1): New define insn.
(vsm3msg2): Ditto.
(vsm3rnds2): Ditto.
* doc/extend.texi: Document sm3.
* doc/invoke.texi: Document -msm3.
* doc/sourcebuild.texi: Document target sm3.
* config/i386/sm3intrin.h: New file.

gcc/testsuite/ChangeLog:

* g++.dg/other/i386-2.C: Add -msm3.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/avx-1.c: Add new define for immediate.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/sse-12.c: Add -msm3.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add sm3.
* gcc.target/i386/sse-23.c: Ditto.
* lib/target-supports.exp (check_effective_target_sm3): New.
* gcc.target/i386/sm3-1.c: New test.
* gcc.target/i386/sm3-check.h: Ditto.
* gcc.target/i386/sm3msg1-2.c: Ditto.
* gcc.target/i386/sm3msg2-2.c: Ditto.
* gcc.target/i386/sm3rnds2-2.c: Ditto.
---
 gcc/common/config/i386/cpuinfo.h  |   2 +
 gcc/common/config/i386/i386-common.cc |  20 +++-
 gcc/common/config/i386/i386-cpuinfo.h |   1 +
 gcc/common/config/i386/i386-isas.h|   1 +
 gcc/config.gcc|   3 +-
 gcc/config/i386/cpuid.h   |   1 +
 gcc/config/i386/i386-builtin-types.def|   3 +
 gcc/config/i386/i386-builtin.def  |   5 +
 gcc/config/i386/i386-c.cc |   2 +
 gcc/config/i386/i386-expand.cc|   1 +
 gcc/config/i386/i386-isa.def  |   1 +
 gcc/config/i386/i386-options.cc   |   2 +
 gcc/config/i386/i386.opt  |   5 +
 gcc/config/i386/immintrin.h   |   2 +
 gcc/config/i386/sm3intrin.h   |  72 
 gcc/config/i386/sse.md|  43 
 gcc/doc/extend.texi   |   5 +
 gcc/doc/invoke.texi   |   7 +-
 gcc/doc/sourcebuild.texi  |   3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |   2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |   2 +-
 gcc/testsuite/gcc.target/i386/avx-1.c |   3 +
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
 gcc/testsuite/gcc.target/i386/sm3-1.c |  17 +++
 gcc/testsuite/gcc.target/i386/sm3-check.h |  37 +++
 gcc/testsuite/gcc.target/i386/sm3msg1-2.c |  54 +
 gcc/testsuite/gcc.target/i386/sm3msg2-2.c |  57 ++
 gcc/testsuite/gcc.target/i386/sm3rnds2-2.c| 104 ++
 gcc/testsuite/gcc.target/i386/sse-12.c|   2 +-
 gcc/testsuite/gcc.target/i386/sse-13.c|   5 +-
 gcc/testsuite/gcc.target/i386/sse-14.c|   5 +-
 gcc/testsuite/gcc.target/i386/sse-22.c|   7 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|   5 +-
 gcc/testsuite/lib/target-supports.exp |  15 +++
 34 files changed, 484 insertions(+), 12 deletions(-)
 create mode 100644 gcc/config/i386/sm3intrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/sm3-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sm3-check.h
 create mode 100644 gcc/testsuite/gcc.target/i386/sm3msg1-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sm3msg2-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sm3rnds2-2.c

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 3599f9def2c..e5cdffe017a 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -877,6 +877,8 @@ get_available_features (struct __processor_model *cpu_model,
set_feature (FEATURE_AVXNECONVERT);
  if (edx & 

[PATCH 4/4] Support Intel SM4

2023-07-13 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features):
Detech SM4.
* common/config/i386/i386-common.cc (OPTION_MASK_ISA2_SM4_SET,
OPTION_MASK_ISA2_SM4_UNSET): New.
(OPTION_MASK_ISA2_AVX_UNSET): Add SM4.
(ix86_handle_option): Handle -msm4.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_SM4.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
sm4.
* config.gcc: Add sm4intrin.h.
* config/i386/cpuid.h (bit_SM4): New.
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__SM4__.
* config/i386/i386-isa.def (SM4): Add DEF_PTA(SM4).
* config/i386/i386-options.cc (isa2_opts): Add -msm4.
(ix86_valid_target_attribute_inner_p): Handle sm4.
* config/i386/i386.opt: Add option -msm4.
* config/i386/immintrin.h: Include sm4intrin.h
* config/i386/sse.md (vsm4key4_): New define insn.
(vsm4rnds4_): Ditto.
* doc/extend.texi: Document sm4.
* doc/invoke.texi: Document -msm4.
* doc/sourcebuild.texi: Document target sm4.
* config/i386/sm4intrin.h: New file.

gcc/testsuite/ChangeLog:

* g++.dg/other/i386-2.C: Add -msm4.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/sse-12.c: Add -msm4.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add sm4.
* gcc.target/i386/sse-23.c: Ditto.
* lib/target-supports.exp (check_effective_target_sm4): New.
* gcc.target/i386/sm4-1.c: New test.
* gcc.target/i386/sm4-check.h: Ditto.
* gcc.target/i386/sm4key4-2.c: Ditto.
* gcc.target/i386/sm4rnds4-2.c: Ditto.
---
 gcc/common/config/i386/cpuinfo.h  |   2 +
 gcc/common/config/i386/i386-common.cc |  20 +-
 gcc/common/config/i386/i386-cpuinfo.h |   1 +
 gcc/common/config/i386/i386-isas.h|   1 +
 gcc/config.gcc|   2 +-
 gcc/config/i386/cpuid.h   |   1 +
 gcc/config/i386/i386-builtin.def  |   6 +
 gcc/config/i386/i386-c.cc |   2 +
 gcc/config/i386/i386-isa.def  |   1 +
 gcc/config/i386/i386-options.cc   |   4 +-
 gcc/config/i386/i386.opt  |   5 +
 gcc/config/i386/immintrin.h   |   2 +
 gcc/config/i386/sm4intrin.h   |  70 +++
 gcc/config/i386/sse.md|  26 +++
 gcc/doc/extend.texi   |   5 +
 gcc/doc/invoke.texi   |   9 +-
 gcc/doc/sourcebuild.texi  |   3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |   2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |   2 +-
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
 gcc/testsuite/gcc.target/i386/sm4-1.c |  20 ++
 gcc/testsuite/gcc.target/i386/sm4-check.h | 183 ++
 gcc/testsuite/gcc.target/i386/sm4key4-2.c |  14 ++
 gcc/testsuite/gcc.target/i386/sm4rnds4-2.c|  14 ++
 gcc/testsuite/gcc.target/i386/sse-12.c|   2 +-
 gcc/testsuite/gcc.target/i386/sse-13.c|   2 +-
 gcc/testsuite/gcc.target/i386/sse-14.c|   2 +-
 gcc/testsuite/gcc.target/i386/sse-22.c|   4 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|   2 +-
 gcc/testsuite/lib/target-supports.exp |  14 ++
 30 files changed, 409 insertions(+), 14 deletions(-)
 create mode 100644 gcc/config/i386/sm4intrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/sm4-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sm4-check.h
 create mode 100644 gcc/testsuite/gcc.target/i386/sm4key4-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sm4rnds4-2.c

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 0cfde3ebccd..f9434f038ea 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -881,6 +881,8 @@ get_available_features (struct __processor_model *cpu_model,
set_feature (FEATURE_SM3);
  if (eax & bit_SHA512)
set_feature (FEATURE_SHA512);
+ if (eax & bit_SM4)
+   set_feature (FEATURE_SM4);
}
   if (avx512_usable)
{
diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index 97c3cdfe5e1..610cabe52c1 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -122,6 +122,7 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA2_AVXVNNIINT16_SET OPTION_MASK_ISA2_AVXVNNIINT16
 #define OPTION_MASK_ISA2_SM3_SET OPTION_MASK_ISA2_SM3
 #define OPTION_MASK_ISA2_SHA512_SET OPTION_MASK_ISA2_SHA512
+#define OPTION_MASK_ISA2_SM4_SET 

[PATCH 0/4] Support Intel Arrow Lake/Lunar Lake ISAs

2023-07-13 Thread Haochen Jiang via Gcc-patches
Hi all,

These four patches aimed to add Intel Arrow Lake/Lunar Lake
instructions, including AVX-VNNI-INT16, SM3, SHA512 and SM4.

The information is based on newly released
Intel Architecture Instruction Set Extensions and Future Features.

The document comes following:
https://www.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen




[PATCH] i386: Guard 128 bit VAES builtins with AVX512VL

2023-07-10 Thread Haochen Jiang via Gcc-patches
Hi all,

Currently on trunk, both usage of intrin and builtin for 128 bit VAES
ISA will result in ICE since we did not check AVX512VL until pattern,
which is not user expected. This patch aims to fix that ICE and throw
an error under this scenario.

Regtested on x86-64-linux-gnu{-m32,}. Ok for trunk?

BRs,
Haochen

Since commit 24a8acc, 128 bit intrin is enabled for VAES. However,
AVX512VL is not checked until we reached into pattern, which reports an
ICE.

Added an AVX512VL guard at builtin to report error when checking ISA
flags.

gcc/ChangeLog:

* config/i386/i386-builtins.cc (ix86_init_mmx_sse_builtins):
Add OPTION_MASK_ISA_AVX512VL.
* config/i386/i386-expand.cc (ix86_check_builtin_isa_match):
Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx512vl-vaes-1.c: New test.
---
 gcc/config/i386/i386-builtins.cc| 12 
 gcc/config/i386/i386-expand.cc  |  4 +++-
 gcc/testsuite/gcc.target/i386/avx512vl-vaes-1.c | 12 
 3 files changed, 23 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vaes-1.c

diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index 28f404da288..e436ca4e5b1 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -662,19 +662,23 @@ ix86_init_mmx_sse_builtins (void)
   VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
 
   /* AES */
-  def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
+  def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2
+| OPTION_MASK_ISA_AVX512VL,
 OPTION_MASK_ISA2_VAES,
 "__builtin_ia32_aesenc128",
 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
-  def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
+  def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2
+| OPTION_MASK_ISA_AVX512VL,
 OPTION_MASK_ISA2_VAES,
 "__builtin_ia32_aesenclast128",
 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
-  def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
+  def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2
+| OPTION_MASK_ISA_AVX512VL,
 OPTION_MASK_ISA2_VAES,
 "__builtin_ia32_aesdec128",
 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
-  def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
+  def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2
+| OPTION_MASK_ISA_AVX512VL,
 OPTION_MASK_ISA2_VAES,
 "__builtin_ia32_aesdeclast128",
 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 567248d6830..9a04bf4455b 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -12626,6 +12626,7 @@ ix86_check_builtin_isa_match (unsigned int fcode,
OPTION_MASK_ISA2_AVXIFMA
  (OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA2_AVX512BF16) or
OPTION_MASK_ISA2_AVXNECONVERT
+ OPTION_MASK_ISA_AES or (OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA2_VAES)
  where for each such pair it is sufficient if either of the ISAs is
  enabled, plus if it is ored with other options also those others.
  OPTION_MASK_ISA_MMX in bisa is satisfied also if TARGET_MMX_WITH_SSE.  */
@@ -12649,7 +12650,8 @@ ix86_check_builtin_isa_match (unsigned int fcode,
 OPTION_MASK_ISA2_AVXIFMA);
   SHARE_BUILTIN (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, 0,
 OPTION_MASK_ISA2_AVXNECONVERT);
-  SHARE_BUILTIN (OPTION_MASK_ISA_AES, 0, 0, OPTION_MASK_ISA2_VAES);
+  SHARE_BUILTIN (OPTION_MASK_ISA_AES, 0, OPTION_MASK_ISA_AVX512VL,
+OPTION_MASK_ISA2_VAES);
   isa = tmp_isa;
   isa2 = tmp_isa2;
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vaes-1.c 
b/gcc/testsuite/gcc.target/i386/avx512vl-vaes-1.c
new file mode 100644
index 000..fabb170a031
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vaes-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mvaes -mno-avx512vl -mno-aes" } */
+
+#include 
+
+typedef long long v2di __attribute__((vector_size (16)));
+
+v2di
+f1 (v2di x, v2di y)
+{
+  return __builtin_ia32_aesenc128 (x, y); /* { dg-error "needs isa option" } */
+}
-- 
2.31.1



[PATCH] i386: Share AES xmm intrin with VAES

2023-04-18 Thread Haochen Jiang via Gcc-patches
Hi all,

I realized that I attached a old version of my patch. We should change
the error message of pr109117-1.c but not pr84335.c.

Please review this patch.

Thx,
Haochen

gcc/ChangeLog:

* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_AVX_UNSET): Add OPTION_MASK_ISA2_VAES_UNSET.
(ix86_handle_option): Set AVX flag for VAES.
* config/i386/i386-builtins.cc (ix86_init_mmx_sse_builtins):
Add OPTION_MASK_ISA2_VAES_UNSET.
(def_builtin): Share builtin between AES and VAES.
* config/i386/i386-expand.cc (ix86_check_builtin_isa_match):
Ditto.
* config/i386/i386.md (aes): New isa attribute.
* config/i386/sse.md (aesenc): Add pattern for VAES with xmm.
(aesenclast): Ditto.
(aesdec): Ditto.
(aesdeclast): Ditto.
* config/i386/vaesintrin.h: Remove redundant avx target push.
* config/i386/wmmintrin.h (_mm_aesdec_si128): Change to macro.
(_mm_aesdeclast_si128): Ditto.
(_mm_aesenc_si128): Ditto.
(_mm_aesenclast_si128): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx512fvl-vaes-1.c: Add VAES xmm test.
* gcc.target/i386/pr109117-1.c: Modify error message.
---
 gcc/common/config/i386/i386-common.cc |  5 +-
 gcc/config/i386/i386-builtins.cc  | 21 ---
 gcc/config/i386/i386-expand.cc|  1 +
 gcc/config/i386/i386.md   |  3 +-
 gcc/config/i386/sse.md| 60 ++-
 gcc/config/i386/vaesintrin.h  |  4 +-
 gcc/config/i386/wmmintrin.h   | 29 +++--
 .../gcc.target/i386/avx512fvl-vaes-1.c| 11 
 gcc/testsuite/gcc.target/i386/pr109117-1.c|  4 +-
 9 files changed, 75 insertions(+), 63 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index c7954da8e34..bf126f14073 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -348,7 +348,8 @@ along with GCC; see the file COPYING3.  If not see
| OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET)
 #define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
   OPTION_MASK_ISA2_SSE_UNSET
-#define OPTION_MASK_ISA2_AVX_UNSET OPTION_MASK_ISA2_AVX2_UNSET
+#define OPTION_MASK_ISA2_AVX_UNSET \
+  (OPTION_MASK_ISA2_AVX2_UNSET | OPTION_MASK_ISA2_VAES_UNSET)
 #define OPTION_MASK_ISA2_SSE4_2_UNSET OPTION_MASK_ISA2_AVX_UNSET
 #define OPTION_MASK_ISA2_SSE4_1_UNSET OPTION_MASK_ISA2_SSE4_2_UNSET
 #define OPTION_MASK_ISA2_SSE4_UNSET OPTION_MASK_ISA2_SSE4_1_UNSET
@@ -685,6 +686,8 @@ ix86_handle_option (struct gcc_options *opts,
{
  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_VAES_SET;
  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_VAES_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
}
   else
{
diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index fc0c82b156e..28f404da288 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -279,14 +279,15 @@ def_builtin (HOST_WIDE_INT mask, HOST_WIDE_INT mask2,
   if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0)
   && (mask == 0 || (mask & ix86_isa_flags) != 0))
  || ((mask & OPTION_MASK_ISA_MMX) != 0 && TARGET_MMX_WITH_SSE)
- /* "Unified" builtin used by either AVXVNNI/AVXIFMA intrinsics
-or AVX512VNNIVL/AVX512IFMAVL non-mask intrinsics should be
-defined whenever avxvnni/avxifma or avx512vnni/avxifma &&
-avx512vl exist.  */
+ /* "Unified" builtin used by either AVXVNNI/AVXIFMA/AES intrinsics
+or AVX512VNNIVL/AVX512IFMAVL/VAESVL non-mask intrinsics should be
+defined whenever avxvnni/avxifma/aes or avx512vnni/avx512ifma/vaes
+&& avx512vl exist.  */
  || (mask2 == OPTION_MASK_ISA2_AVXVNNI)
  || (mask2 == OPTION_MASK_ISA2_AVXIFMA)
  || (mask2 == (OPTION_MASK_ISA2_AVXNECONVERT
| OPTION_MASK_ISA2_AVX512BF16))
+ || ((mask2 & OPTION_MASK_ISA2_VAES) != 0)
  || (lang_hooks.builtin_function
  == lang_hooks.builtin_function_ext_scope))
{
@@ -661,16 +662,20 @@ ix86_init_mmx_sse_builtins (void)
   VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
 
   /* AES */
-  def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0,
+  def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
+OPTION_MASK_ISA2_VAES,
 "__builtin_ia32_aesenc128",
 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
-  def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0,
+  def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
+OPTION_MASK_ISA2_VAES,
 "__builtin_ia32_aesenclast128",
   

[PATCH] i386: Share AES xmm intrin with VAES

2023-04-18 Thread Haochen Jiang via Gcc-patches
Hi all,

Currently in GCC, the 128 bit intrin for instruction vaes{end,dec}{last,}
is under AES ISA. Because there is no dependency between ISA set AES
and VAES, The 128 bit intrin is not available when we use compiler flag
-mvaes -mavx512vl and there is no other way to use that intrin. But it
should according to Intel SDM.

Although VAES aims to be a VEX/EVEX promotion for AES, but it is only part
of it. Therefore, we share the AES xmm intrin with VAES.

Also, since -mvaes indicates that we could use VEX encoding for ymm, we
should imply AVX for VAES.

Tested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_AVX_UNSET): Add OPTION_MASK_ISA2_VAES_UNSET.
(ix86_handle_option): Set AVX flag for VAES.
* config/i386/i386-builtins.cc (ix86_init_mmx_sse_builtins):
Add OPTION_MASK_ISA2_VAES_UNSET.
(def_builtin): Share builtin between AES and VAES.
* config/i386/i386-expand.cc (ix86_check_builtin_isa_match):
Ditto.
* config/i386/i386.md (aes): New isa attribute.
* config/i386/sse.md (aesenc): Add pattern for VAES with xmm.
(aesenclast): Ditto.
(aesdec): Ditto.
(aesdeclast): Ditto.
* config/i386/vaesintrin.h: Remove redundant avx target push.
* config/i386/wmmintrin.h (_mm_aesdec_si128): Change to macro.
(_mm_aesdeclast_si128): Ditto.
(_mm_aesenc_si128): Ditto.
(_mm_aesenclast_si128): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx512fvl-vaes-1.c: Add VAES xmm test.
* gcc.target/i386/pr84335.c: Modify error message.
---
 gcc/common/config/i386/i386-common.cc |  5 +-
 gcc/config/i386/i386-builtins.cc  | 21 ---
 gcc/config/i386/i386-expand.cc|  1 +
 gcc/config/i386/i386.md   |  3 +-
 gcc/config/i386/sse.md| 60 ++-
 gcc/config/i386/vaesintrin.h  |  4 +-
 gcc/config/i386/wmmintrin.h   | 29 +++--
 .../gcc.target/i386/avx512fvl-vaes-1.c| 11 
 gcc/testsuite/gcc.target/i386/pr84335.c   |  4 +-
 9 files changed, 75 insertions(+), 63 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index c7954da8e34..bf126f14073 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -348,7 +348,8 @@ along with GCC; see the file COPYING3.  If not see
| OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET)
 #define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
   OPTION_MASK_ISA2_SSE_UNSET
-#define OPTION_MASK_ISA2_AVX_UNSET OPTION_MASK_ISA2_AVX2_UNSET
+#define OPTION_MASK_ISA2_AVX_UNSET \
+  (OPTION_MASK_ISA2_AVX2_UNSET | OPTION_MASK_ISA2_VAES_UNSET)
 #define OPTION_MASK_ISA2_SSE4_2_UNSET OPTION_MASK_ISA2_AVX_UNSET
 #define OPTION_MASK_ISA2_SSE4_1_UNSET OPTION_MASK_ISA2_SSE4_2_UNSET
 #define OPTION_MASK_ISA2_SSE4_UNSET OPTION_MASK_ISA2_SSE4_1_UNSET
@@ -685,6 +686,8 @@ ix86_handle_option (struct gcc_options *opts,
{
  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_VAES_SET;
  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_VAES_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
}
   else
{
diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index fc0c82b156e..28f404da288 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -279,14 +279,15 @@ def_builtin (HOST_WIDE_INT mask, HOST_WIDE_INT mask2,
   if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0)
   && (mask == 0 || (mask & ix86_isa_flags) != 0))
  || ((mask & OPTION_MASK_ISA_MMX) != 0 && TARGET_MMX_WITH_SSE)
- /* "Unified" builtin used by either AVXVNNI/AVXIFMA intrinsics
-or AVX512VNNIVL/AVX512IFMAVL non-mask intrinsics should be
-defined whenever avxvnni/avxifma or avx512vnni/avxifma &&
-avx512vl exist.  */
+ /* "Unified" builtin used by either AVXVNNI/AVXIFMA/AES intrinsics
+or AVX512VNNIVL/AVX512IFMAVL/VAESVL non-mask intrinsics should be
+defined whenever avxvnni/avxifma/aes or avx512vnni/avx512ifma/vaes
+&& avx512vl exist.  */
  || (mask2 == OPTION_MASK_ISA2_AVXVNNI)
  || (mask2 == OPTION_MASK_ISA2_AVXIFMA)
  || (mask2 == (OPTION_MASK_ISA2_AVXNECONVERT
| OPTION_MASK_ISA2_AVX512BF16))
+ || ((mask2 & OPTION_MASK_ISA2_VAES) != 0)
  || (lang_hooks.builtin_function
  == lang_hooks.builtin_function_ext_scope))
{
@@ -661,16 +662,20 @@ ix86_init_mmx_sse_builtins (void)
   VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
 
   /* AES */
-  def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0,
+  def_builtin_const 

[PATCH] i386: Add PCLMUL dependency for VPCLMULQDQ

2023-04-18 Thread Haochen Jiang via Gcc-patches
Hi all,

Currently in GCC, the 128 bit intrin for instruction vpclmulqdq is
under PCLMUL ISA. Because there is no dependency between ISA set PCLMUL
and VPCLMULQDQ, The 128 bit intrin is not available when we just use
compiler flag -mvpclmulqdq. But it should according to Intel SDM.

Since VPCLMULQDQ is a VEX/EVEX promotion for PCLMUL, it is natural to
add dependency between them.

Also, with -mvpclmulqdq, we can use ymm under VEX encoding, so
VPCLMULQDQ should imply AVX.

Tested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* common/config/i386/i386-common.cc
(OPTION_MASK_ISA_VPCLMULQDQ_SET):
Add OPTION_MASK_ISA_PCLMUL_SET and OPTION_MASK_ISA_AVX_SET.
(OPTION_MASK_ISA_AVX_UNSET):
Add OPTION_MASK_ISA_VPCLMULQDQ_UNSET.
(OPTION_MASK_ISA_PCLMUL_UNSET): Ditto.
* config/i386/i386.md (vpclmulqdqvl): New.
* config/i386/sse.md (pclmulqdq): Add evex encoding.
* config/i386/vpclmulqdqintrin.h: Remove redudant avx target
push.

gcc/testsuite/ChangeLog:

* gcc.target/i386/vpclmulqdq.c: Add compile test for xmm.
---
 gcc/common/config/i386/i386-common.cc  |  9 ++---
 gcc/config/i386/i386.md|  4 +++-
 gcc/config/i386/sse.md | 11 ++-
 gcc/config/i386/vpclmulqdqintrin.h |  4 ++--
 gcc/testsuite/gcc.target/i386/vpclmulqdq.c |  3 +++
 5 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index 315db854862..c7954da8e34 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -171,7 +171,9 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA_GFNI_SET OPTION_MASK_ISA_GFNI
 #define OPTION_MASK_ISA_SHSTK_SET OPTION_MASK_ISA_SHSTK
 #define OPTION_MASK_ISA2_VAES_SET OPTION_MASK_ISA2_VAES
-#define OPTION_MASK_ISA_VPCLMULQDQ_SET OPTION_MASK_ISA_VPCLMULQDQ
+#define OPTION_MASK_ISA_VPCLMULQDQ_SET \
+  (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_PCLMUL_SET \
+   | OPTION_MASK_ISA_AVX_SET)
 #define OPTION_MASK_ISA_MOVDIRI_SET OPTION_MASK_ISA_MOVDIRI
 #define OPTION_MASK_ISA2_MOVDIR64B_SET OPTION_MASK_ISA2_MOVDIR64B
 #define OPTION_MASK_ISA2_WAITPKG_SET OPTION_MASK_ISA2_WAITPKG
@@ -211,7 +213,7 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA_AVX_UNSET \
   (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
| OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET \
-   | OPTION_MASK_ISA_AVX2_UNSET )
+   | OPTION_MASK_ISA_AVX2_UNSET | OPTION_MASK_ISA_VPCLMULQDQ_UNSET)
 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
 #define OPTION_MASK_ISA_FXSR_UNSET OPTION_MASK_ISA_FXSR
 #define OPTION_MASK_ISA_XSAVE_UNSET \
@@ -314,7 +316,8 @@ along with GCC; see the file COPYING3.  If not see
 
 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
 #define OPTION_MASK_ISA_SHA_UNSET OPTION_MASK_ISA_SHA
-#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
+#define OPTION_MASK_ISA_PCLMUL_UNSET \
+  (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_VPCLMULQDQ_UNSET)
 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
 #define OPTION_MASK_ISA2_PCONFIG_UNSET OPTION_MASK_ISA2_PCONFIG
 #define OPTION_MASK_ISA2_WBNOINVD_UNSET OPTION_MASK_ISA2_WBNOINVD
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ed689b044c3..acc994226e7 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -841,7 +841,7 @@
avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl,
avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma,
-   avx512ifmavl,avxneconvert,avx512bf16vl"
+   avx512ifmavl,avxneconvert,avx512bf16vl,vpclmulqdqvl"
   (const_string "base"))
 
 ;; Define instruction set of MMX instructions
@@ -903,6 +903,8 @@
 (eq_attr "isa" "avxneconvert") (symbol_ref "TARGET_AVXNECONVERT")
 (eq_attr "isa" "avx512bf16vl")
   (symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL")
+(eq_attr "isa" "vpclmulqdqvl")
+  (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL")
 
 (eq_attr "mmx_isa" "native")
   (symbol_ref "!TARGET_MMX_WITH_SSE")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 26812ab6106..33e281901cf 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -25195,20 +25195,21 @@
(set_attr "mode" "TI")])
 
 (define_insn "pclmulqdq"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
-   (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
- (match_operand:V2DI 2 "vector_operand" "xBm,xm")
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
+   (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
+ (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")
   

[PATCH] i386: Fix vpblendm{b,w} intrins and insns

2023-04-18 Thread Haochen Jiang via Gcc-patches
Hi all,

For vpblendm{b,w}, they actually do not have constant parameters.
Therefore, there is no need for them been wrapped in __OPTIMIZE__.

Also, we should check TARGET_AVX512VL for 128/256 bit vectors in patterns.

This patch did the fixes mentioned above. Tested on x86_64-pc-linux-gnu.
Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* config/i386/avx512vlbwintrin.h
(_mm_mask_blend_epi16): Remove __OPTIMIZE__ wrapper.
(_mm_mask_blend_epi8): Ditto.
(_mm256_mask_blend_epi16): Ditto.
(_mm256_mask_blend_epi8): Ditto.
* config/i386/avx512vlintrin.h
(_mm256_mask_blend_pd): Ditto.
(_mm256_mask_blend_ps): Ditto.
(_mm256_mask_blend_epi64): Ditto.
(_mm256_mask_blend_epi32): Ditto.
(_mm_mask_blend_pd): Ditto.
(_mm_mask_blend_ps): Ditto.
(_mm_mask_blend_epi64): Ditto.
(_mm_mask_blend_epi32): Ditto.
* config/i386/sse.md (VF_AVX512BWHFBF16): Removed.
(VF_AVX512HFBFVL): Move it before the first usage.
(_blendm): Change iterator from VF_AVX512BWHFBF16
to VF_AVX512HFBFVL.
---
 gcc/config/i386/avx512vlbwintrin.h |  92 ++-
 gcc/config/i386/avx512vlintrin.h   | 184 +++--
 gcc/config/i386/sse.md |  17 ++-
 3 files changed, 115 insertions(+), 178 deletions(-)

diff --git a/gcc/config/i386/avx512vlbwintrin.h 
b/gcc/config/i386/avx512vlbwintrin.h
index 0232783a362..9d2aba2a8ff 100644
--- a/gcc/config/i386/avx512vlbwintrin.h
+++ b/gcc/config/i386/avx512vlbwintrin.h
@@ -257,6 +257,42 @@ _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
 (__mmask16) __U);
 }
 
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
+{
+  return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A,
+   (__v8hi) __W,
+   (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
+{
+  return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A,
+   (__v16qi) __W,
+   (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
+{
+  return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A,
+   (__v16hi) __W,
+   (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
+{
+  return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A,
+   (__v32qi) __W,
+   (__mmask32) __U);
+}
+
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtepi16_epi8 (__m256i __A)
@@ -1442,42 +1478,6 @@ _mm_maskz_dbsad_epu8 (__mmask8 __U, __m128i __A, __m128i 
__B,
(__mmask8) __U);
 }
 
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
-{
-  return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A,
-   (__v8hi) __W,
-   (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
-{
-  return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A,
-   (__v16qi) __W,
-   (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
-{
-  return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A,
-   (__v16hi) __W,
-   (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
-{
-  return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A,
-   (__v32qi) __W,
-   (__mmask32) __U);
-}
-
 extern __inline __mmask8
 

[PATCH 2/2] i386: Add AVX512BW dependency to AVX512VBMI2

2023-04-18 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* common/config/i386/i386-common.cc
(OPTION_MASK_ISA_AVX512VBMI2_SET): Change OPTION_MASK_ISA_AVX512F_SET
to OPTION_MASK_ISA_AVX512BW_SET.
(OPTION_MASK_ISA_AVX512F_UNSET):
Remove OPTION_MASK_ISA_AVX512VBMI2_UNSET.
(OPTION_MASK_ISA_AVX512BW_UNSET):
Add OPTION_MASK_ISA_AVX512VBMI2_UNSET.
* config/i386/avx512vbmi2intrin.h: Do not push avx512bw.
* config/i386/avx512vbmi2vlintrin.h: Ditto.
* config/i386/i386-builtin.def: Remove OPTION_MASK_ISA_AVX512BW.
* config/i386/sse.md (VI12_AVX512VLBW): Removed.
(VI12_VI48F_AVX512VLBW): Rename to VI12_VI48F_AVX512VL.
(compress_mask): Change iterator from VI12_AVX512VLBW to
VI12_AVX512VL.
(compressstore_mask): Ditto.
(expand_mask): Ditto.
(expand_maskz): Ditto.
(*expand_mask): Change iterator from VI12_VI48F_AVX512VLBW to
VI12_VI48F_AVX512VL.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx512bw-pr100267-1.c: Remove avx512f and avx512bw.
* gcc.target/i386/avx512bw-pr100267-b-2.c: Ditto.
* gcc.target/i386/avx512bw-pr100267-d-2.c: Ditto.
* gcc.target/i386/avx512bw-pr100267-q-2.c: Ditto.
* gcc.target/i386/avx512bw-pr100267-w-2.c: Ditto.
* gcc.target/i386/avx512f-vpcompressb-1.c: Ditto.
* gcc.target/i386/avx512f-vpcompressb-2.c: Ditto.
* gcc.target/i386/avx512f-vpcompressw-1.c: Ditto.
* gcc.target/i386/avx512f-vpcompressw-2.c: Ditto.
* gcc.target/i386/avx512f-vpexpandb-1.c: Ditto.
* gcc.target/i386/avx512f-vpexpandb-2.c: Ditto.
* gcc.target/i386/avx512f-vpexpandw-1.c: Ditto.
* gcc.target/i386/avx512f-vpexpandw-2.c: Ditto.
* gcc.target/i386/avx512f-vpshld-1.c: Ditto.
* gcc.target/i386/avx512f-vpshldd-2.c: Ditto.
* gcc.target/i386/avx512f-vpshldq-2.c: Ditto.
* gcc.target/i386/avx512f-vpshldv-1.c: Ditto.
* gcc.target/i386/avx512f-vpshldvd-2.c: Ditto.
* gcc.target/i386/avx512f-vpshldvq-2.c: Ditto.
* gcc.target/i386/avx512f-vpshldvw-2.c: Ditto.
* gcc.target/i386/avx512f-vpshrdd-2.c: Ditto.
* gcc.target/i386/avx512f-vpshrdq-2.c: Ditto.
* gcc.target/i386/avx512f-vpshrdv-1.c: Ditto.
* gcc.target/i386/avx512f-vpshrdvd-2.c: Ditto.
* gcc.target/i386/avx512f-vpshrdvq-2.c: Ditto.
* gcc.target/i386/avx512f-vpshrdvw-2.c: Ditto.
* gcc.target/i386/avx512f-vpshrdw-2.c: Ditto.
* gcc.target/i386/avx512vbmi2-vpshld-1.c: Ditto.
* gcc.target/i386/avx512vbmi2-vpshrd-1.c: Ditto.
* gcc.target/i386/avx512vl-vpcompressb-1.c: Ditto.
* gcc.target/i386/avx512vl-vpcompressb-2.c: Ditto.
* gcc.target/i386/avx512vl-vpcompressw-2.c: Ditto.
* gcc.target/i386/avx512vl-vpexpandb-1.c: Ditto.
* gcc.target/i386/avx512vl-vpexpandb-2.c: Ditto.
* gcc.target/i386/avx512vl-vpexpandw-1.c: Ditto.
* gcc.target/i386/avx512vl-vpexpandw-2.c: Ditto.
* gcc.target/i386/avx512vl-vpshldd-2.c: Ditto.
* gcc.target/i386/avx512vl-vpshldq-2.c: Ditto.
* gcc.target/i386/avx512vl-vpshldv-1.c: Ditto.
* gcc.target/i386/avx512vl-vpshldvd-2.c: Ditto.
* gcc.target/i386/avx512vl-vpshldvq-2.c: Ditto.
* gcc.target/i386/avx512vl-vpshldvw-2.c: Ditto.
* gcc.target/i386/avx512vl-vpshrdd-2.c: Ditto.
* gcc.target/i386/avx512vl-vpshrdq-2.c: Ditto.
* gcc.target/i386/avx512vl-vpshrdv-1.c: Ditto.
* gcc.target/i386/avx512vl-vpshrdvd-2.c: Ditto.
* gcc.target/i386/avx512vl-vpshrdvq-2.c: Ditto.
* gcc.target/i386/avx512vl-vpshrdvw-2.c: Ditto.
* gcc.target/i386/avx512vl-vpshrdw-2.c: Ditto.
* gcc.target/i386/avx512vlbw-pr100267-1.c: Ditto.
* gcc.target/i386/avx512vlbw-pr100267-b-2.c: Ditto.
* gcc.target/i386/avx512vlbw-pr100267-w-2.c: Ditto.
---
 gcc/common/config/i386/i386-common.cc |  5 +-
 gcc/config/i386/avx512vbmi2intrin.h   | 18 ++-
 gcc/config/i386/avx512vbmi2vlintrin.h | 21 ++--
 gcc/config/i386/i386-builtin.def  | 48 -
 gcc/config/i386/sse.md| 51 ---
 .../gcc.target/i386/avx512bw-pr100267-1.c |  2 +-
 .../gcc.target/i386/avx512bw-pr100267-b-2.c   |  3 +-
 .../gcc.target/i386/avx512bw-pr100267-d-2.c   |  3 +-
 .../gcc.target/i386/avx512bw-pr100267-q-2.c   |  3 +-
 .../gcc.target/i386/avx512bw-pr100267-w-2.c   |  3 +-
 .../gcc.target/i386/avx512f-vpcompressb-1.c   |  2 +-
 .../gcc.target/i386/avx512f-vpcompressb-2.c   |  3 +-
 .../gcc.target/i386/avx512f-vpcompressw-1.c   |  2 +-
 .../gcc.target/i386/avx512f-vpcompressw-2.c   |  3 +-
 .../gcc.target/i386/avx512f-vpexpandb-1.c |  2 +-
 .../gcc.target/i386/avx512f-vpexpandb-2.c |  3 +-
 .../gcc.target/i386/avx512f-vpexpandw-1.c |  2 +-
 .../gcc.target/i386/avx512f-vpexpandw-2.c |  3 +-
 

[PATCH 1/2] i386: Add AVX512BW dependency to AVX512BITALG

2023-04-18 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* common/config/i386/i386-common.cc
(OPTION_MASK_ISA_AVX512BITALG_SET):
Change OPTION_MASK_ISA_AVX512F_SET
to OPTION_MASK_ISA_AVX512BW_SET.
(OPTION_MASK_ISA_AVX512F_UNSET):
Remove OPTION_MASK_ISA_AVX512BITALG_SET.
(OPTION_MASK_ISA_AVX512BW_UNSET):
Add OPTION_MASK_ISA_AVX512BITALG_SET.
* config/i386/avx512bitalgintrin.h: Do not push avx512bw.
* config/i386/i386-builtin.def:
Remove redundant OPTION_MASK_ISA_AVX512BW.
* config/i386/sse.md (VI1_AVX512VLBW): Removed.
(avx512vl_vpshufbitqmb):
Change the iterator from VI1_AVX512VLBW to VI1_AVX512VL.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx512bitalg-vpopcntb-1.c:
Remove avx512bw.
* gcc.target/i386/avx512bitalg-vpopcntb.c: Ditto.
* gcc.target/i386/avx512bitalg-vpopcntbvl.c: Ditto.
* gcc.target/i386/avx512bitalg-vpopcntw-1.c: Ditto.
* gcc.target/i386/avx512bitalg-vpopcntw.c: Ditto.
* gcc.target/i386/avx512bitalg-vpopcntwvl.c: Ditto.
* gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c: Ditto.
* gcc.target/i386/avx512bitalg-vpshufbitqmb.c: Ditto.
* gcc.target/i386/avx512bitalgvl-vpopcntb-1.c: Ditto.
* gcc.target/i386/avx512bitalgvl-vpopcntw-1.c: Ditto.
* gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c: Ditto.
* gcc.target/i386/pr93696-1.c: Ditto.
* gcc.target/i386/pr93696-2.c: Ditto.
---
 gcc/common/config/i386/i386-common.cc |  8 ++--
 gcc/config/i386/avx512bitalgintrin.h  | 39 ---
 gcc/config/i386/i386-builtin.def  | 10 ++---
 gcc/config/i386/sse.md|  8 +---
 .../gcc.target/i386/avx512bitalg-vpopcntb-1.c |  3 +-
 .../gcc.target/i386/avx512bitalg-vpopcntb.c   |  2 +-
 .../gcc.target/i386/avx512bitalg-vpopcntbvl.c |  2 +-
 .../gcc.target/i386/avx512bitalg-vpopcntw-1.c |  3 +-
 .../gcc.target/i386/avx512bitalg-vpopcntw.c   |  2 +-
 .../gcc.target/i386/avx512bitalg-vpopcntwvl.c |  2 +-
 .../i386/avx512bitalg-vpshufbitqmb-1.c|  2 +-
 .../i386/avx512bitalg-vpshufbitqmb.c  |  2 +-
 .../i386/avx512bitalgvl-vpopcntb-1.c  |  3 +-
 .../i386/avx512bitalgvl-vpopcntw-1.c  |  3 +-
 .../i386/avx512bitalgvl-vpshufbitqmb-1.c  |  2 +-
 gcc/testsuite/gcc.target/i386/pr93696-1.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr93696-2.c |  2 +-
 17 files changed, 32 insertions(+), 63 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index d90c558311b..f78fc0a60e2 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -91,7 +91,7 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA_AVX512VPOPCNTDQ_SET \
   (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512F_SET)
 #define OPTION_MASK_ISA_AVX512BITALG_SET \
-  (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512F_SET)
+  (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512BW_SET)
 #define OPTION_MASK_ISA2_AVX512BF16_SET OPTION_MASK_ISA2_AVX512BF16
 #define OPTION_MASK_ISA_RTM_SET OPTION_MASK_ISA_RTM
 #define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW
@@ -234,14 +234,14 @@ along with GCC; see the file COPYING3.  If not see
| OPTION_MASK_ISA_AVX512VL_UNSET | OPTION_MASK_ISA_AVX512IFMA_UNSET \
| OPTION_MASK_ISA_AVX512VBMI2_UNSET \
| OPTION_MASK_ISA_AVX512VNNI_UNSET \
-   | OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET \
-   | OPTION_MASK_ISA_AVX512BITALG_UNSET)
+   | OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET)
 #define OPTION_MASK_ISA_AVX512CD_UNSET OPTION_MASK_ISA_AVX512CD
 #define OPTION_MASK_ISA_AVX512PF_UNSET OPTION_MASK_ISA_AVX512PF
 #define OPTION_MASK_ISA_AVX512ER_UNSET OPTION_MASK_ISA_AVX512ER
 #define OPTION_MASK_ISA_AVX512DQ_UNSET OPTION_MASK_ISA_AVX512DQ
 #define OPTION_MASK_ISA_AVX512BW_UNSET \
-  (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VBMI_UNSET)
+  (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VBMI_UNSET \
+   | OPTION_MASK_ISA_AVX512BITALG_UNSET)
 #define OPTION_MASK_ISA_AVX512VL_UNSET OPTION_MASK_ISA_AVX512VL
 #define OPTION_MASK_ISA_AVX512IFMA_UNSET OPTION_MASK_ISA_AVX512IFMA
 #define OPTION_MASK_ISA2_AVXIFMA_UNSET OPTION_MASK_ISA2_AVXIFMA
diff --git a/gcc/config/i386/avx512bitalgintrin.h 
b/gcc/config/i386/avx512bitalgintrin.h
index aa6d652938a..a1c7be109a9 100644
--- a/gcc/config/i386/avx512bitalgintrin.h
+++ b/gcc/config/i386/avx512bitalgintrin.h
@@ -48,17 +48,6 @@ _mm512_popcnt_epi16 (__m512i __A)
   return (__m512i) __builtin_ia32_vpopcountw_v32hi ((__v32hi) __A);
 }
 
-#ifdef __DISABLE_AVX512BITALG__
-#undef __DISABLE_AVX512BITALG__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512BITALG__ */
-
-#if !defined(__AVX512BITALG__) || !defined(__AVX512BW__)
-#pragma GCC push_options
-#pragma GCC target("avx512bitalg,avx512bw")
-#define __DISABLE_AVX512BITALGBW__
-#endif /* __AVX512VLBW__ */
-
 extern __inline __m512i
 

[PATCH 0/2] i386: Add missing AVX512BW dependency for ISAs using 32/64 bit mask

2023-04-18 Thread Haochen Jiang via Gcc-patches
Hi all,

32/64 bit mask are introduced in AVX512BW. Therefore, when we are using them,
we should imply AVX512BW.

The two patches added the dependency and removed the redundant AVX512BW usage
for AVX512BITALG and AVX512VBMI2.

Tested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen




[PATCH] i386: Use macro to wrap up share builtin exceptions in builtin isa check

2023-04-18 Thread Haochen Jiang via Gcc-patches
Hi all,

Currently in i386, we have several ISAs share builtin between each other
which is handled in ix86_check_builtin_isa_match with if condition clauses.

The patterns for these clauses are quite similar so it will be more friendly
for developers if we rewrite them as a macro.

This patch adds that macro. Tested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* config/i386/i386-expand.cc
(ix86_check_builtin_isa_match): Correct wrong comments.
Add a new macro SHARE_BUILTIN and refactor the current if
clauses to macro.
---
 gcc/config/i386/i386-expand.cc | 72 --
 1 file changed, 24 insertions(+), 48 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 0d817fc3f3b..54d5dfae677 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -12588,6 +12588,7 @@ ix86_check_builtin_isa_match (unsigned int fcode,
   HOST_WIDE_INT isa2 = ix86_isa_flags2;
   HOST_WIDE_INT bisa = ix86_builtins_isa[fcode].isa;
   HOST_WIDE_INT bisa2 = ix86_builtins_isa[fcode].isa2;
+  HOST_WIDE_INT tmp_isa = isa, tmp_isa2 = isa2;
   /* The general case is we require all the ISAs specified in bisa{,2}
  to be enabled.
  The exceptions are:
@@ -12596,60 +12597,35 @@ ix86_check_builtin_isa_match (unsigned int fcode,
  OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4
  (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL) or
OPTION_MASK_ISA2_AVXVNNI
- (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512IFMA) or
+ (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL) or
OPTION_MASK_ISA2_AVXIFMA
- (OPTION_MASK_ISA_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16) or
+ (OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA2_AVX512BF16) or
OPTION_MASK_ISA2_AVXNECONVERT
  where for each such pair it is sufficient if either of the ISAs is
  enabled, plus if it is ored with other options also those others.
  OPTION_MASK_ISA_MMX in bisa is satisfied also if TARGET_MMX_WITH_SSE.  */
-  if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A))
-   == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A))
-  && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) != 0)
-isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A);
 
-  if (((bisa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32))
-   == (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32))
-  && (isa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) != 0)
-isa |= (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32);
-
-  if (((bisa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
-   == (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
-  && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0)
-isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4);
-
-  if bisa & (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL))
-   == (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL))
-   || (bisa2 & OPTION_MASK_ISA2_AVXVNNI) != 0)
-  && (((isa & (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL))
-  == (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL))
- || (isa2 & OPTION_MASK_ISA2_AVXVNNI) != 0))
-{
-  isa |= OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL;
-  isa2 |= OPTION_MASK_ISA2_AVXVNNI;
-}
-
-  if bisa & (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL))
-   == (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL))
-   || (bisa2 & OPTION_MASK_ISA2_AVXIFMA) != 0)
-  && (((isa & (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL))
-  == (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL))
- || (isa2 & OPTION_MASK_ISA2_AVXIFMA) != 0))
-{
-  isa |= OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL;
-  isa2 |= OPTION_MASK_ISA2_AVXIFMA;
-}
-
-  if bisa & OPTION_MASK_ISA_AVX512VL) != 0
-&& (bisa2 & OPTION_MASK_ISA2_AVX512BF16) != 0)
-   && (bisa2 & OPTION_MASK_ISA2_AVXNECONVERT) != 0)
-   && (((isa & OPTION_MASK_ISA_AVX512VL) != 0
-   && (isa2 & OPTION_MASK_ISA2_AVX512BF16) != 0)
-  || (isa2 & OPTION_MASK_ISA2_AVXNECONVERT) != 0))
-{
-  isa |= OPTION_MASK_ISA_AVX512VL;
-  isa2 |= OPTION_MASK_ISA2_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16;
-}
+#define SHARE_BUILTIN(A1, A2, B1, B2) \
+  if bisa & (A1)) == (A1) && (bisa2 & (A2)) == (A2)) \
+   && ((bisa & (B1)) == (B1) && (bisa2 & (B2)) == (B2))) \
+  && (((isa & (A1)) == (A1) && (isa2 & (A2)) == (A2)) \
+ || ((isa & (B1)) == (B1) && (isa2 & (B2)) == (B2 \
+{ \
+  tmp_isa |= (A1) | (B1); \
+  tmp_isa2 |= (A2) | (B2); \
+}
+
+  SHARE_BUILTIN (OPTION_MASK_ISA_SSE, 0, OPTION_MASK_ISA_3DNOW_A, 0);
+  SHARE_BUILTIN (OPTION_MASK_ISA_SSE4_2, 0, OPTION_MASK_ISA_CRC32, 0);
+  SHARE_BUILTIN (OPTION_MASK_ISA_FMA, 0, OPTION_MASK_ISA_FMA4, 0);
+  SHARE_BUILTIN (OPTION_MASK_ISA_AVX512VNNI | 

[PATCH] gcc-13: Mention Intel AMX-COMPLEX ISA support and revise march support

2023-04-09 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch mentions Intel AMX-COMPLEX ISA support in GCC 13.

Also it revises the march support according to newly released
Intel Architecture Instruction Set Extensions and Future Features.

Ok for trunk?

BRs,
Haochen

---
 htdocs/gcc-13/changes.html | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index 71cb335d..84207104 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -520,6 +520,10 @@ a work-in-progress.
   RAO-INT intrinsics are available via the -mraoint
   compiler switch.
   
+  New ISA extension support for Intel AMX-COMPLEX was added.
+  AMX-COMPLEX intrinsics are available via the -mamx-complex
+  compiler switch.
+  
   GCC now supports the Intel CPU named Raptor Lake through
 -march=raptorlake.
 Raptor Lake is based on Alder Lake.
@@ -538,9 +542,13 @@ a work-in-progress.
 The switch enables the AVX-IFMA, AVX-VNNI-INT8, AVX-NE-CONVERT, CMPccXADD
 and RAO-INT ISA extensions.
   
+  GCC now supports the Intel CPU named Emerald Rapids through
+-march=emeraldrapids.
+Emerald Rapids is based on Sapphire Rapids.
+  
   GCC now supports the Intel CPU named Granite Rapids through
 -march=graniterapids.
-The switch enables the AMX-FP16 and PREFETCHI ISA extensions.
+The switch enables the AMX-FP16, PREFETCHI and AMX-COMPLEX ISA extensions.
   
   GCC now supports AMD CPUs based on the znver4 core
 via -march=znver4.  The switch makes GCC consider
-- 
2.31.1



[PATCH 1/2] Support Intel AMX-COMPLEX

2023-04-03 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features):
Detect AMX-COMPLEX.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_AMX_COMPLEX_SET,
OPTION_MASK_ISA2_AMX_COMPLEX_UNSET): New.
(ix86_handle_option): Handle -mamx-complex.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_AMX_COMPLEX.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
amx-complex.
* config.gcc: Add amxcomplexintrin.h.
* config/i386/cpuid.h (bit_AMX_COMPLEX): New.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__AMX_COMPLEX__.
* config/i386/i386-isa.def (AMX_COMPLEX): Add DEF_PTA(AMX_COMPLEX).
* config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p):
Handle amx-complex.
* config/i386/i386.opt: Add option -mamx-complex.
* config/i386/immintrin.h: Include amxcomplexintrin.h.
* doc/extend.texi: Document amx-complex.
* doc/invoke.texi: Document -mamx-complex.
* doc/sourcebuild.texi: Document target amx-complex.
* config/i386/amxcomplexintrin.h: New file.

gcc/testsuite/ChangeLog:

* g++.dg/other/i386-2.C: Add -mamx-complex.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/amx-check.h: Add cpu check for AMX-COMPLEX.
* gcc.target/i386/amx-helper.h: Add amx-complex support.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/sse-12.c: Add -mamx-complex.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add amx-complex.
* gcc.target/i386/sse-23.c: Ditto.
* lib/target-supports.exp (check_effective_target_amx_complex): New.
* gcc.target/i386/amxcomplex-asmatt-1.c: New test.
* gcc.target/i386/amxcomplex-asmintel-1.c: Ditto.
* gcc.target/i386/amxcomplex-cmmimfp16ps-2.c: Ditto.
* gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c: Ditto.
---
 gcc/common/config/i386/cpuinfo.h  |  2 +
 gcc/common/config/i386/i386-common.cc | 19 +-
 gcc/common/config/i386/i386-cpuinfo.h |  1 +
 gcc/common/config/i386/i386-isas.h|  2 +
 gcc/config.gcc|  2 +-
 gcc/config/i386/amxcomplexintrin.h| 59 +++
 gcc/config/i386/cpuid.h   |  1 +
 gcc/config/i386/i386-c.cc |  2 +
 gcc/config/i386/i386-isa.def  |  1 +
 gcc/config/i386/i386-options.cc   |  4 +-
 gcc/config/i386/i386.opt  |  4 ++
 gcc/config/i386/immintrin.h   |  2 +
 gcc/doc/extend.texi   |  5 ++
 gcc/doc/invoke.texi   | 11 ++--
 gcc/doc/sourcebuild.texi  |  3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |  2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |  2 +-
 gcc/testsuite/gcc.target/i386/amx-check.h |  3 +
 gcc/testsuite/gcc.target/i386/amx-helper.h|  4 +-
 .../gcc.target/i386/amxcomplex-asmatt-1.c | 15 +
 .../gcc.target/i386/amxcomplex-asmintel-1.c   | 12 
 .../i386/amxcomplex-cmmimfp16ps-2.c   | 53 +
 .../i386/amxcomplex-cmmrlfp16ps-2.c   | 53 +
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |  2 +
 gcc/testsuite/gcc.target/i386/sse-12.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-13.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-14.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-22.c|  4 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|  2 +-
 gcc/testsuite/lib/target-supports.exp | 11 
 30 files changed, 270 insertions(+), 17 deletions(-)
 create mode 100644 gcc/config/i386/amxcomplexintrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-asmatt-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-cmmimfp16ps-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 5bde0cddb24..61559ed9de2 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -879,6 +879,8 @@ get_available_features (struct __processor_model *cpu_model,
{
  if (eax & bit_AMX_FP16)
set_feature (FEATURE_AMX_FP16);
+ if (edx & bit_AMX_COMPLEX)
+   set_feature (FEATURE_AMX_COMPLEX);
}
 }
 
diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index 0181e06b1c5..d90c558311b 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -117,6 +117,8 @@ along with GCC; see the file COPYING3.  If not see
   (OPTION_MASK_ISA2_AMX_TILE | 

[PATCH 2/2] i386: Add AMX-COMPLEX to Granite Rapids

2023-04-03 Thread Haochen Jiang via Gcc-patches
gcc/Changelog:

* config/gcc/i386.h (PTA_GRANITERAPIDS): Add PTA_AMX_COMPLEX.
---
 gcc/config/i386/i386.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index dd9391c492b..1da6dce8e0b 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2361,7 +2361,7 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | 
PTA_ADX | PTA_AVX
 constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_AVXIFMA
   | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD;
 constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | 
PTA_AMX_FP16
-  | PTA_PREFETCHI;
+  | PTA_PREFETCHI | PTA_AMX_COMPLEX;
 constexpr wide_int_bitmask PTA_GRANDRIDGE = PTA_SIERRAFOREST | PTA_RAOINT;
 constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
   | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
-- 
2.31.1



[PATCH 0/2] Support Intel AMX-COMPLEX

2023-04-03 Thread Haochen Jiang via Gcc-patches
Hi all,

These patch aims to add Intel AMX-COMPLEX instructions. Also we added
AMX-COMPLEX to -march=graniterapids.

The information is based on newly released
Intel Architecture Instruction Set Extensions and Future Features.

The document comes following:
https://www.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html

Since there are only two instructions under this new ISA, I suppose the
risk is low and might get a change for GCC13. So I send the patches out now.

Tested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen




[PATCH] Fix intrin name in Intel CMPccXADD

2022-12-13 Thread Haochen Jiang via Gcc-patches
Hi all,

We usually use only one "_" but not two "__" as prefix in intrin.

This patch aims to fix the intrin name for CMPccXADD.

Bootstrapped and regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* config/i386/cmpccxaddintrin.h
(__cmpccxadd_epi32): Rename to _cmpccxadd_epi32.
(__cmpccxadd_epi64): Rename to _cmpccxadd_epi64.

gcc/testsuite/ChangeLog:

* gcc.target/i386/cmpccxadd-1.c: Fix intrin name.
* gcc.target/i386/cmpccxadd-2.c: Ditto.
---
 gcc/config/i386/cmpccxaddintrin.h   |  8 +--
 gcc/testsuite/gcc.target/i386/cmpccxadd-1.c | 64 ++---
 gcc/testsuite/gcc.target/i386/cmpccxadd-2.c | 64 ++---
 3 files changed, 68 insertions(+), 68 deletions(-)

diff --git a/gcc/config/i386/cmpccxaddintrin.h 
b/gcc/config/i386/cmpccxaddintrin.h
index 1afa03bd08a..11fce1f5e50 100644
--- a/gcc/config/i386/cmpccxaddintrin.h
+++ b/gcc/config/i386/cmpccxaddintrin.h
@@ -58,23 +58,23 @@ typedef enum {
 #ifdef __OPTIMIZE__
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__cmpccxadd_epi32 (int *__A, int __B, int __C, const _CMPCCX_ENUM __D)
+_cmpccxadd_epi32 (int *__A, int __B, int __C, const _CMPCCX_ENUM __D)
 {
   return __builtin_ia32_cmpccxadd (__A, __B, __C, __D);
 }
 
 extern __inline long long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__cmpccxadd_epi64 (long long *__A, long long __B, long long __C,
+_cmpccxadd_epi64 (long long *__A, long long __B, long long __C,
   const _CMPCCX_ENUM __D)
 {
   return __builtin_ia32_cmpccxadd64 (__A, __B, __C, __D);
 }
 #else
-#define __cmpccxadd_epi32(A,B,C,D) \
+#define _cmpccxadd_epi32(A,B,C,D) \
   __builtin_ia32_cmpccxadd ((int *) (A), (int) (B), (int) (C), \
(_CMPCCX_ENUM) (D))
-#define __cmpccxadd_epi64(A,B,C,D) \
+#define _cmpccxadd_epi64(A,B,C,D) \
   __builtin_ia32_cmpccxadd64 ((long long *) (A), (long long) (B), \
  (long long) (C), (_CMPCCX_ENUM) (D))
 #endif
diff --git a/gcc/testsuite/gcc.target/i386/cmpccxadd-1.c 
b/gcc/testsuite/gcc.target/i386/cmpccxadd-1.c
index c825717e29e..537b79b8d2d 100644
--- a/gcc/testsuite/gcc.target/i386/cmpccxadd-1.c
+++ b/gcc/testsuite/gcc.target/i386/cmpccxadd-1.c
@@ -26,36 +26,36 @@ long long e, f;
 void extern
 cmpccxadd_test(void)
 {
-  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_O);
-  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_O);
-  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NO);
-  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NO);
-  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_B);
-  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_B);
-  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NB);
-  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NB);
-  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_Z);
-  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_Z);
-  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NZ);
-  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NZ);
-  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_BE);
-  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_BE);
-  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NBE);
-  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NBE);
-  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_S);
-  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_S);
-  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NS);
-  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NS);
-  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_P);
-  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_P);
-  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NP);
-  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NP);
-  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_L);
-  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_L);
-  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NL);
-  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NL);
-  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_LE);
-  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_LE);
-  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NLE);
-  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NLE);
+  b = _cmpccxadd_epi32 (a, b, c, _CMPCCX_O);
+  e = _cmpccxadd_epi64 (d, e, f, _CMPCCX_O);
+  b = _cmpccxadd_epi32 (a, b, c, _CMPCCX_NO);
+  e = _cmpccxadd_epi64 (d, e, f, _CMPCCX_NO);
+  b = _cmpccxadd_epi32 (a, b, c, _CMPCCX_B);
+  e = _cmpccxadd_epi64 (d, e, f, _CMPCCX_B);
+  b = _cmpccxadd_epi32 (a, b, c, _CMPCCX_NB);
+  e = _cmpccxadd_epi64 (d, e, f, _CMPCCX_NB);
+  b = _cmpccxadd_epi32 (a, b, c, _CMPCCX_Z);
+  e = _cmpccxadd_epi64 (d, e, f, _CMPCCX_Z);
+  b = _cmpccxadd_epi32 (a, b, c, _CMPCCX_NZ);
+  e = _cmpccxadd_epi64 (d, e, f, _CMPCCX_NZ);
+  b = _cmpccxadd_epi32 (a, b, c, _CMPCCX_BE);
+  e = _cmpccxadd_epi64 (d, e, f, _CMPCCX_BE);
+  b = _cmpccxadd_epi32 (a, b, c, _CMPCCX_NBE);
+  e = _cmpccxadd_epi64 (d, e, f, _CMPCCX_NBE);
+  b = _cmpccxadd_epi32 (a, b, c, _CMPCCX_S);
+  e = _cmpccxadd_epi64 (d, e, f, _CMPCCX_S);
+  b = _cmpccxadd_epi32 (a, b, c, _CMPCCX_NS);
+  e = _cmpccxadd_epi64 (d, e, f, _CMPCCX_NS);
+  b = _cmpccxadd_epi32 (a, b, c, _CMPCCX_P);
+  e = _cmpccxadd_epi64 (d, e, f, _CMPCCX_P);
+  b = _cmpccxadd_epi32 (a, b, c, _CMPCCX_NP);
+  e = 

[PATCH] i386: Add AMX-TILE dependency for AMX related ISAs

2022-11-10 Thread Haochen Jiang via Gcc-patches
Hi all,

For all AMX related ISAs, we have a potential dependency on AMX-TILE
or we even won't have the basic support on AMX.

This patch added those dependency. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_AMX_INT8_SET): Add AMX-TILE dependency.
(OPTION_MASK_ISA2_AMX_BF16_SET): Ditto.
(OPTION_MASK_ISA2_AMX_FP16_SET): Ditto.
(OPTION_MASK_ISA2_AMX_TILE_UNSET): Disable AMX_{INT8,
BF16, FP16} when disable AMX_TILE.

gcc/testsuite/ChangeLog:

* gcc.target/i386/amxbf16-dpbf16ps-2.c: Remove -amx-tile.
* gcc.target/i386/amxfp16-dpfp16ps-2.c: Ditto.
* gcc.target/i386/amxint8-dpbssd-2.c: Ditto.
* gcc.target/i386/amxint8-dpbsud-2.c: Ditto.
* gcc.target/i386/amxint8-dpbusd-2.c: Ditto.
* gcc.target/i386/amxint8-dpbuud-2.c: Ditto.
---
 gcc/common/config/i386/i386-common.cc  | 13 +
 gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c |  3 +--
 gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c |  3 +--
 gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c   |  3 +--
 gcc/testsuite/gcc.target/i386/amxint8-dpbsud-2.c   |  3 +--
 gcc/testsuite/gcc.target/i386/amxint8-dpbusd-2.c   |  3 +--
 gcc/testsuite/gcc.target/i386/amxint8-dpbuud-2.c   |  3 +--
 7 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index 431fd0d3ad1..5e6d3da0306 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -106,12 +106,15 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA_CLWB_SET OPTION_MASK_ISA_CLWB
 #define OPTION_MASK_ISA2_AVX512VP2INTERSECT_SET 
OPTION_MASK_ISA2_AVX512VP2INTERSECT
 #define OPTION_MASK_ISA2_AMX_TILE_SET OPTION_MASK_ISA2_AMX_TILE
-#define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
-#define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
+#define OPTION_MASK_ISA2_AMX_INT8_SET \
+  (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_INT8)
+#define OPTION_MASK_ISA2_AMX_BF16_SET \
+  (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_BF16)
 #define OPTION_MASK_ISA2_AVXVNNIINT8_SET OPTION_MASK_ISA2_AVXVNNIINT8
 #define OPTION_MASK_ISA2_AVXNECONVERT_SET OPTION_MASK_ISA2_AVXNECONVERT
 #define OPTION_MASK_ISA2_CMPCCXADD_SET OPTION_MASK_ISA2_CMPCCXADD
-#define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16
+#define OPTION_MASK_ISA2_AMX_FP16_SET \
+  (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_FP16)
 #define OPTION_MASK_ISA2_PREFETCHI_SET OPTION_MASK_ISA2_PREFETCHI
 #define OPTION_MASK_ISA2_RAOINT_SET OPTION_MASK_ISA2_RAOINT
 
@@ -277,7 +280,9 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA2_SERIALIZE_UNSET OPTION_MASK_ISA2_SERIALIZE
 #define OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET 
OPTION_MASK_ISA2_AVX512VP2INTERSECT
 #define OPTION_MASK_ISA2_TSXLDTRK_UNSET OPTION_MASK_ISA2_TSXLDTRK
-#define OPTION_MASK_ISA2_AMX_TILE_UNSET OPTION_MASK_ISA2_AMX_TILE
+#define OPTION_MASK_ISA2_AMX_TILE_UNSET \
+  (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_INT8_UNSET \
+   | OPTION_MASK_ISA2_AMX_BF16_UNSET | OPTION_MASK_ISA2_AMX_FP16_UNSET)
 #define OPTION_MASK_ISA2_AMX_INT8_UNSET OPTION_MASK_ISA2_AMX_INT8
 #define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16
 #define OPTION_MASK_ISA2_UINTR_UNSET OPTION_MASK_ISA2_UINTR
diff --git a/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c 
b/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c
index b00bc13ec78..35881e7682a 100644
--- a/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c
+++ b/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c
@@ -1,7 +1,6 @@
 /* { dg-do run { target { ! ia32 } } } */
-/* { dg-require-effective-target amx_tile } */
 /* { dg-require-effective-target amx_bf16 } */
-/* { dg-options "-O2 -mamx-tile -mamx-bf16" } */
+/* { dg-options "-O2 -mamx-bf16" } */
 #include 
 
 #define AMX_BF16
diff --git a/gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c 
b/gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c
index 2d359a689ea..a1fafbcbfeb 100644
--- a/gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c
+++ b/gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c
@@ -1,8 +1,7 @@
 /* { dg-do run { target { ! ia32 } } } */
-/* { dg-require-effective-target amx_tile } */
 /* { dg-require-effective-target amx_fp16 } */
 /* { dg-require-effective-target avx512fp16 } */
-/* { dg-options "-O2 -mamx-tile -mamx-fp16 -mavx512fp16" } */
+/* { dg-options "-O2 -mamx-fp16 -mavx512fp16" } */
 #define AMX_FP16
 #define DO_TEST test_amx_fp16_dpfp16ps
 void test_amx_fp16_dpfp16ps ();
diff --git a/gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c 
b/gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c
index 74ad71be5c5..d7efb3d20c2 100644
--- a/gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c
+++ b/gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c
@@ -1,7 +1,6 @@
 /* { dg-do run { target { ! ia32 } } } */
-/* { 

[wwwdocs] gcc-13: Mention Intel new ISA and march support.

2022-11-09 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch aims to mention newly added Intel ISA and march support.

Ok for trunk?

BRs,
Haochen

---
 htdocs/gcc-13/changes.html | 50 ++
 1 file changed, 50 insertions(+)

diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index bd11cbec..0daf921b 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -240,6 +240,56 @@ a work-in-progress.
   __bf16 type to x86 psABI. Users need to adjust their
   AVX512BF16-related source code when upgrading GCC12 to GCC13.
   
+  New ISA extension support for Intel AVX-IFMA was added to GCC.
+  AVX-IFMA intrinsics are available via the -mavxifma
+  compiler switch.
+  
+  New ISA extension support for Intel AVX-VNNI-INT8 was added to GCC.
+  AVX-VNNI-INT8 intrinsics are available via the -mavxvnniint8
+  compiler switch.
+  
+  New ISA extension support for Intel AVX-NE-CONVERT was added to GCC.
+  AVX-NE-CONVERT intrinsics are available via the
+  -mavxneconvert compiler switch.
+  
+  New ISA extension support for Intel CMPccXADD was added to GCC.
+  CMPccXADD intrinsics are available via the -mcmpccxadd
+  compiler switch.
+  
+  New ISA extension support for Intel AMX-FP16 was added to GCC.
+  AMX-FP16 intrinsics are available via the -mamx-fp16
+  compiler switch.
+  
+  New ISA extension support for Intel PREFETCHI was added to GCC.
+  PREFETCHI intrinsics are available via the -mprefetchi
+  compiler switch.
+  
+  New ISA extension support for Intel RAO-INT was added to GCC.
+  RAO-INT intrinsics are available via the -mraoint
+  compiler switch.
+  
+  GCC now supports the Intel CPU named Raptor Lake through
+-march=raptorlake.
+Raptor Lake is based on Alder Lake.
+  
+  GCC now supports the Intel CPU named Meteor Lake through
+-march=meteorlake.
+Meteor Lake is based on Alder Lake.
+  
+  GCC now supports the Intel CPU named Sierra Forest through
+-march=sierraforest.
+The switch enables the AVX-IFMA, AVX-VNNI-INT8, AVX-NE-CONVERT and
+CMPccXADD ISA extensions.
+  
+  GCC now supports the Intel CPU named Grand Ridge through
+-march=grandridge.
+The switch enables the AVX-IFMA, AVX-VNNI-INT8, AVX-NE-CONVERT, CMPccXADD
+and RAO-INT ISA extensions.
+  
+  GCC now supports the Intel CPU named Granite Rapids through
+-march=graniterapids.
+The switch enables the AMX-FP16 and PREFETCHI ISA extensions.
+  
 
 
 
-- 
2.18.1



[PATCH] i386: Add ISA check for newly introduced prefetch builtins.

2022-11-08 Thread Haochen Jiang via Gcc-patches
Hi all,

As Hongtao said, the fail on pentiumpro is caused by missing ISA check
since we are using emit_insn () through new builtins and it won't check
if the TARGET matches. Previously, the builtin in middle-end will check
that.

On pentiumpro, we won't have anything that supports any prefetch so that
it dropped into the pattern and then failed.

I have added the restrictions just like what middle-end builtin_prefetch
does. Also I added missing checks for PREFETCHI. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* config/i386/i386-builtin.def (BDESC): Add
OPTION_MASK_ISA2_PREFETCHI for prefetchi builtin.
* config/i386/i386-expand.cc (ix86_expand_builtin):
Add ISA check before emit_insn.
* config/i386/prfchiintrin.h: Add target for intrin.

gcc/testsuite/ChangeLog:

* gcc.target/i386/prefetchi-5.c: New test.
---
 gcc/config/i386/i386-builtin.def|  2 +-
 gcc/config/i386/i386-expand.cc  | 11 +--
 gcc/config/i386/prfchiintrin.h  | 14 +-
 gcc/testsuite/gcc.target/i386/prefetchi-5.c |  4 
 4 files changed, 27 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-5.c

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index ea3aff7f125..5e0461acc00 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -498,7 +498,7 @@ BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, 
"__builtin_ia32_aesencwide1
 BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, 
"__builtin_ia32_aesencwide256kl_u8", IX86_BUILTIN_AESENCWIDE256KLU8, UNKNOWN, 
(int) UINT8_FTYPE_PV2DI_PCV2DI_PCVOID)
 
 /* PREFETCHI */
-BDESC (0, 0, CODE_FOR_prefetchi, "__builtin_ia32_prefetchi", 
IX86_BUILTIN_PREFETCHI, UNKNOWN, (int) VOID_FTYPE_PCVOID_INT)
+BDESC (0, OPTION_MASK_ISA2_PREFETCHI, CODE_FOR_prefetchi, 
"__builtin_ia32_prefetchi", IX86_BUILTIN_PREFETCHI, UNKNOWN, (int) 
VOID_FTYPE_PCVOID_INT)
 BDESC (0, 0, CODE_FOR_nothing, "__builtin_ia32_prefetch", 
IX86_BUILTIN_PREFETCH, UNKNOWN, (int) VOID_FTYPE_PCVOID_INT_INT_INT)
 
 BDESC_END (SPECIAL_ARGS, PURE_ARGS)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 9c92b07d5cd..0e45c195390 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -13131,7 +13131,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
 
if (INTVAL (op3) == 1)
  {
-   if (TARGET_64BIT
+   if (TARGET_64BIT && TARGET_PREFETCHI
&& local_func_symbolic_operand (op0, GET_MODE (op0)))
  emit_insn (gen_prefetchi (op0, op2));
else
@@ -13150,7 +13150,14 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
op0 = convert_memory_address (Pmode, op0);
op0 = copy_addr_to_reg (op0);
  }
-   emit_insn (gen_prefetch (op0, op1, op2));
+
+   if (TARGET_3DNOW || TARGET_PREFETCH_SSE
+   || TARGET_PRFCHW || TARGET_PREFETCHWT1)
+ emit_insn (gen_prefetch (op0, op1, op2));
+   else if (!MEM_P (op0) && side_effects_p (op0))
+ /* Don't do anything with direct references to volatile memory,
+but generate code to handle other side effects.  */
+ emit_insn (op0);
  }
 
return 0;
diff --git a/gcc/config/i386/prfchiintrin.h b/gcc/config/i386/prfchiintrin.h
index 06deef488ba..996a4be1aba 100644
--- a/gcc/config/i386/prfchiintrin.h
+++ b/gcc/config/i386/prfchiintrin.h
@@ -30,6 +30,13 @@
 
 #ifdef __x86_64__
 
+
+#ifndef __PREFETCHI__
+#pragma GCC push_options
+#pragma GCC target("prefetchi")
+#define __DISABLE_PREFETCHI__
+#endif /* __PREFETCHI__ */
+
 extern __inline void
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _m_prefetchit0 (void* __P)
@@ -44,6 +51,11 @@ _m_prefetchit1 (void* __P)
   __builtin_ia32_prefetchi (__P, 2);
 }
 
-#endif
+#ifdef __DISABLE_PREFETCHI__
+#undef __DISABLE_PREFETCHI__
+#pragma GCC pop_options
+#endif /* __DISABLE_PREFETCHI__ */
+
+#endif /* __x86_64__ */
 
 #endif /* _PRFCHIINTRIN_H_INCLUDED */
diff --git a/gcc/testsuite/gcc.target/i386/prefetchi-5.c 
b/gcc/testsuite/gcc.target/i386/prefetchi-5.c
new file mode 100644
index 000..8c26540f96a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/prefetchi-5.c
@@ -0,0 +1,4 @@
+/* { dg-do compile { target { ia32 } } } */
+/* { dg-options "-O0 -march=pentiumpro" } */
+
+#include "prefetchi-4.c"
-- 
2.18.1



[PATCH 2/2] Add m_CORE_ATOM for atom cores

2022-11-06 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* config/i386/i386-options.cc (m_CORE_ATOM): New.
* config/i386/x86-tune.def
(X86_TUNE_SCHEDULE): Initial tune for CORE_ATOM.
(X86_TUNE_PARTIAL_REG_DEPENDENCY): Ditto.
(X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY): Ditto.
(X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): Ditto.
(X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Ditto.
(X86_TUNE_DEST_FALSE_DEP_FOR_GLC): Ditto.
(X86_TUNE_MEMORY_MISMATCH_STALL): Ditto.
(X86_TUNE_USE_LEAVE): Ditto.
(X86_TUNE_PUSH_MEMORY): Ditto.
(X86_TUNE_USE_INCDEC): Ditto.
(X86_TUNE_INTEGER_DFMODE_MOVES): Ditto.
(X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB): Ditto.
(X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES): Ditto.
(X86_TUNE_USE_SAHF): Ditto.
(X86_TUNE_USE_BT): Ditto.
(X86_TUNE_AVOID_FALSE_DEP_FOR_BMI): Ditto.
(X86_TUNE_ONE_IF_CONV_INSN): Ditto.
(X86_TUNE_AVOID_MFENCE): Ditto.
(X86_TUNE_USE_SIMODE_FIOP): Ditto.
(X86_TUNE_EXT_80387_CONSTANTS): Ditto.
(X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL): Ditto.
(X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL): Ditto.
(X86_TUNE_SSE_TYPELESS_STORES): Ditto.
(X86_TUNE_SSE_LOAD0_BY_PXOR): Ditto.
(X86_TUNE_AVOID_4BYTE_PREFIXES): Ditto.
(X86_TUNE_USE_GATHER_2PARTS): Ditto.
(X86_TUNE_USE_GATHER_4PARTS): Ditto.
(X86_TUNE_USE_GATHER): Ditto.
---
 gcc/config/i386/i386-options.cc |  1 +
 gcc/config/i386/x86-tune.def| 71 +++--
 2 files changed, 41 insertions(+), 31 deletions(-)

diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 23ab1f867d0..e5c77f3a84d 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -139,6 +139,7 @@ along with GCC; see the file COPYING3.  If not see
 #define m_TREMONT (HOST_WIDE_INT_1U<> (W-1) ^ x) -
@@ -372,7 +379,7 @@ DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",
   ~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL
| m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE
| m_LUJIAZUI | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT
-   | m_ALDERLAKE | m_GENERIC))
+   | m_ALDERLAKE | m_CORE_ATOM | m_GENERIC))
 
 /* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp.  */
 DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE | m_LUJIAZUI)
@@ -381,7 +388,8 @@ DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE 
| m_LUJIAZUI)
 DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",
   m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
  | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_LUJIAZUI
- | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC)
+ | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_CORE_ATOM
+ | m_GENERIC)
 
 /*/
 /* SSE instruction selection tuning  */
@@ -397,14 +405,15 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, 
"general_regs_sse_spill",
 DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
  m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM
  | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE
- | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_LUJIAZUI | m_GENERIC)
+ | m_CORE_ATOM | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_LUJIAZUI
+ | m_GENERIC)
 
 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores
instead of a sequence loading registers by parts.  */
 DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
  m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM
  | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE
- | m_BDVER | m_ZNVER | m_LUJIAZUI | m_GENERIC)
+ | m_CORE_ATOM | m_BDVER | m_ZNVER | m_LUJIAZUI | m_GENERIC)
 
 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL: Use packed single
precision 128bit instructions instead of double where possible.   */
@@ -414,13 +423,13 @@ DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, 
"sse_packed_single_insn_optim
 /* X86_TUNE_SSE_TYPELESS_STORES: Always movaps/movups for 128bit stores.   */
 DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores",
  m_AMD_MULTIPLE | m_LUJIAZUI | m_CORE_ALL | m_TREMONT | m_ALDERLAKE
- | m_GENERIC)
+ | m_CORE_ATOM | m_GENERIC)
 
 /* X86_TUNE_SSE_LOAD0_BY_PXOR: Always use pxor to load0 as opposed to
xorps/xorpd and other variants.  */
 DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor",
  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BDVER | m_BTVER | m_ZNVER
- | m_LUJIAZUI | m_TREMONT | m_ALDERLAKE | m_GENERIC)
+ | m_LUJIAZUI | m_TREMONT | 

[PATCH 1/2] Initial Grand Ridge support

2022-11-06 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* common/config/i386/i386-common.cc
(processor_names): Add grandridge.
(processor_alias_table): Ditto.
* common/config/i386/i386-cpuinfo.h:
(enum processor_types): Add INTEL_GRANDRIDGE.
* config.gcc: Add -march=grandridge.
* config/i386/driver-i386.cc (host_detect_local_cpu):
Handle grandridge.
* config/i386/i386-c.cc (ix86_target_macros_internal):
Ditto.
* config/i386/i386-options.cc (m_GRANDRIDGE): New define.
(processor_cost_table): Add grandridge.
* config/i386/i386.h (enum processor_type):
Add PROCESSOR_GRANDRIDGE.
(PTA_GRANDRIDGE): Ditto.
* doc/extend.texi: Add grandridge.
* doc/invoke.texi: Ditto.

gcc/testsuite/ChangeLog:

* gcc/testsuite/g++.target/i386/mv16.C: Add grandridge.
* gcc.target/i386/funcspec-56.inc: Handle new march.
---
 gcc/common/config/i386/cpuinfo.h  | 6 ++
 gcc/common/config/i386/i386-common.cc | 3 +++
 gcc/common/config/i386/i386-cpuinfo.h | 1 +
 gcc/config.gcc| 2 +-
 gcc/config/i386/driver-i386.cc| 5 -
 gcc/config/i386/i386-c.cc | 7 +++
 gcc/config/i386/i386-options.cc   | 2 ++
 gcc/config/i386/i386.h| 2 ++
 gcc/doc/extend.texi   | 3 +++
 gcc/doc/invoke.texi   | 9 +
 gcc/testsuite/g++.target/i386/mv16.C  | 6 ++
 gcc/testsuite/gcc.target/i386/funcspec-56.inc | 1 +
 12 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index df3500adc83..4d1bcffb978 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -573,6 +573,12 @@ get_intel_cpu (struct __processor_model *cpu_model,
   cpu_model->__cpu_type = INTEL_COREI7;
   cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS;
   break;
+case 0xb6:
+  /* Grand Ridge.  */
+  cpu = "grandridge";
+  CHECK___builtin_cpu_is ("grandridge");
+  cpu_model->__cpu_type = INTEL_GRANDRIDGE;
+  break;
 case 0x17:
 case 0x1d:
   /* Penryn.  */
diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index 60a193a651c..431fd0d3ad1 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -1920,6 +1920,7 @@ const char *const processor_names[] =
   "goldmont-plus",
   "tremont",
   "sierraforest",
+  "grandridge",
   "knl",
   "knm",
   "skylake",
@@ -2071,6 +2072,8 @@ const pta processor_alias_table[] =
 M_CPU_TYPE (INTEL_TREMONT), P_PROC_SSE4_2},
   {"sierraforest", PROCESSOR_SIERRAFOREST, CPU_HASWELL, PTA_SIERRAFOREST,
 M_CPU_SUBTYPE (INTEL_SIERRAFOREST), P_PROC_AVX2},
+  {"grandridge", PROCESSOR_GRANDRIDGE, CPU_HASWELL, PTA_GRANDRIDGE,
+M_CPU_TYPE (INTEL_GRANDRIDGE), P_PROC_AVX2},
   {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL,
 M_CPU_TYPE (INTEL_KNL), P_PROC_AVX512F},
   {"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM,
diff --git a/gcc/common/config/i386/i386-cpuinfo.h 
b/gcc/common/config/i386/i386-cpuinfo.h
index 345fda648ff..fe2e9e21fd2 100644
--- a/gcc/common/config/i386/i386-cpuinfo.h
+++ b/gcc/common/config/i386/i386-cpuinfo.h
@@ -61,6 +61,7 @@ enum processor_types
   AMDFAM19H,
   ZHAOXIN_FAM7H,
   INTEL_SIERRAFOREST,
+  INTEL_GRANDRIDGE,
   CPU_TYPE_MAX,
   BUILTIN_CPU_TYPE_MAX = CPU_TYPE_MAX
 };
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 84c040746dc..b5eda046033 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -669,7 +669,7 @@ silvermont knl knm skylake-avx512 cannonlake icelake-client 
icelake-server \
 skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
 sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 
\
 nano-x2 eden-x4 nano-x4 lujiazui x86-64 x86-64-v2 x86-64-v3 x86-64-v4 \
-sierraforest graniterapids native"
+sierraforest graniterapids grandridge native"
 
 # Additional x86 processors supported by --with-cpu=.  Each processor
 # MUST be separated by exactly one space.
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
index 3117d83de00..95c16c23c7f 100644
--- a/gcc/config/i386/driver-i386.cc
+++ b/gcc/config/i386/driver-i386.cc
@@ -591,8 +591,11 @@ const char *host_detect_local_cpu (int argc, const char 
**argv)
  /* This is unknown family 0x6 CPU.  */
  if (has_feature (FEATURE_AVX))
{
+ /* Assume Grand Ridge.  */
+ if (has_feature (FEATURE_RAOINT))
+   cpu = "grandridge";
  /* Assume Granite Rapids.  */
- if (has_feature (FEATURE_AMX_FP16))
+ else if (has_feature (FEATURE_AMX_FP16))
cpu = "graniterapids";
  /* Assume Sierra Forest.  */
  else if 

[PATCH 0/2] Intel Grand Ridge Support

2022-11-06 Thread Haochen Jiang via Gcc-patches
Hi all,

These patches aimed to add initial Granite Rapids support for GCC.
Also we added a new m_CORE_ATOM for future atom core tune. They need
to be checked in after RAO-INT patch.

The information for Granite Rapids comes following:
https://www.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen




[PATCH] Initial Granite Rapids support

2022-11-04 Thread Haochen Jiang via Gcc-patches
From: "Hu, Lin1" 

Hi all,

This patch aimed to add initial Granite Rapids support for GCC.
It needs to be checked in after prefetchit0/t1 patch.

The information for Granite Rapids comes following:
https://www.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/Changelog:

* common/config/i386/cpuinfo.h:
(get_intel_cpu): Handle Granite Rapids.
* common/config/i386/i386-common.cc:
(processor_names): Add graniterapids.
(processor_alias_table): Ditto.
* common/config/i386/i386-cpuinfo.h:
(enum processor_types): Add INTEL_GRANITERAPIDS.
* config.gcc: Add -march=graniterapids.
* config/i386/driver-i386.cc (host_detect_local_cpu):
Handle graniterapids.
* config/i386/i386-c.cc (ix86_target_macros_internal):
Ditto.
* config/i386/i386-options.cc (m_GRANITERAPIDS): New define.
(processor_cost_table): Add graniterapids.
* config/i386/i386.h (enum processor_type):
Add PROCESSOR_GRANITERAPIDS.
(PTA_GRANITERAPIDS): Ditto.
* doc/extend.texi: Add graniterapids.
* doc/invoke.texi: Ditto.

gcc/testsuite/ChangeLog:

* gcc/testsuite/g++.target/i386/mv16.C: Add graniterapids.
* gcc.target/i386/funcspec-56.inc: Handle new march.
---
 gcc/common/config/i386/cpuinfo.h  |  9 +
 gcc/common/config/i386/i386-common.cc |  3 +++
 gcc/common/config/i386/i386-cpuinfo.h |  1 +
 gcc/config.gcc|  2 +-
 gcc/config/i386/driver-i386.cc|  5 -
 gcc/config/i386/i386-c.cc |  7 +++
 gcc/config/i386/i386-options.cc   |  4 +++-
 gcc/config/i386/i386.h|  3 +++
 gcc/doc/extend.texi   |  3 +++
 gcc/doc/invoke.texi   | 11 +++
 gcc/testsuite/g++.target/i386/mv16.C  |  6 ++
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |  1 +
 12 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index ac7761699af..42c25b8a636 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -564,6 +564,15 @@ get_intel_cpu (struct __processor_model *cpu_model,
   CHECK___builtin_cpu_is ("sierraforest");
   cpu_model->__cpu_type = INTEL_SIERRAFOREST;
   break;
+case 0xad:
+case 0xae:
+  /* Granite Rapids.  */
+  cpu = "graniterapids";
+  CHECK___builtin_cpu_is ("corei7");
+  CHECK___builtin_cpu_is ("graniterapids");
+  cpu_model->__cpu_type = INTEL_COREI7;
+  cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS;
+  break;
 case 0x17:
 case 0x1d:
   /* Penryn.  */
diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index 9bcae020a00..c828ae5b7d7 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -1918,6 +1918,7 @@ const char *const processor_names[] =
   "sapphirerapids",
   "alderlake",
   "rocketlake",
+  "graniterapids",
   "intel",
   "lujiazui",
   "geode",
@@ -2037,6 +2038,8 @@ const pta processor_alias_table[] =
 M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
   {"meteorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
 M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
+  {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS,
+M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F},
   {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
 M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
   {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
diff --git a/gcc/common/config/i386/i386-cpuinfo.h 
b/gcc/common/config/i386/i386-cpuinfo.h
index 68eda7a8696..c06f089b0c5 100644
--- a/gcc/common/config/i386/i386-cpuinfo.h
+++ b/gcc/common/config/i386/i386-cpuinfo.h
@@ -96,6 +96,7 @@ enum processor_subtypes
   INTEL_COREI7_ROCKETLAKE,
   ZHAOXIN_FAM7H_LUJIAZUI,
   AMDFAM19H_ZNVER4,
+  INTEL_COREI7_GRANITERAPIDS,
   CPU_SUBTYPE_MAX
 };
 
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 5c782b2f298..03c1523f7af 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -668,7 +668,7 @@ silvermont knl knm skylake-avx512 cannonlake icelake-client 
icelake-server \
 skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
 sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 
\
 nano-x2 eden-x4 nano-x4 lujiazui x86-64 x86-64-v2 x86-64-v3 x86-64-v4 \
-sierraforest native"
+sierraforest graniterapids native"
 
 # Additional x86 processors supported by --with-cpu=.  Each processor
 # MUST be separated by exactly one space.
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
index a265b1c39f9..3117d83de00 100644
--- 

[PATCH] Support Intel prefetchit0/t1

2022-11-04 Thread Haochen Jiang via Gcc-patches
Hi all,

We will take back the patches which add a new parameter on original
builtin_prefetch and implement instruction prefetch on that.

Also we consider that since we will only do that on specific backend,
no need to add a new rtl for that.

This patch will only support instructions prefetch for x86 backend.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features):
Detect PREFETCHI.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_PREFETCHI_SET,
OPTION_MASK_ISA2_PREFETCHI_UNSET): New.
(ix86_handle_option): Handle -mprefetchi.
* common/config/i386/i386-cpuinfo.h
(enum processor_features): Add FEATURE_PREFETCHI.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY
for prefetchi.
* config.gcc: Add prfchiintrin.h.
* config/i386/cpuid.h (bit_PREFETCHI): New.
* config/i386/i386-builtin-types.def:
Add DEF_FUNCTION_TYPE (VOID, PCVOID, INT)
and DEF_FUNCTION_TYPE (VOID, PCVOID, INT, INT, INT).
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-c.cc (ix86_target_macros_internal):
Define __PREFETCHI__.
* config/i386/i386-expand.cc: Handle new builtins.
* config/i386/i386-isa.def (PREFETCHI):
Add DEF_PTA(PREFETCHI).
* config/i386/i386-options.cc
(ix86_valid_target_attribute_inner_p): Handle prefetchi.
* config/i386/i386.md (prefetchi): New define_insn.
* config/i386/i386.opt: Add option -mprefetchi.
* config/i386/predicates.md (local_func_symbolic_operand):
New predicates.
* config/i386/x86gprintrin.h: Include prfchiintrin.h.
* config/i386/xmmintrin.h (enum _mm_hint): New enum for
prefetchi.
(_mm_prefetch): Handle the highest bit of enum.
* doc/extend.texi: Document prefetchi.
* doc/invoke.texi: Document -mprefetchi.
* doc/sourcebuild.texi: Document target prefetchi.
* config/i386/prfchiintrin.h: New file.

gcc/testsuite/ChangeLog:

* g++.dg/other/i386-2.C: Add -mprefetchi.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/avx-1.c: Ditto.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/sse-13.c: Add -mprefetchi.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/x86gprintrin-1.c: Ditto.
* gcc.target/i386/x86gprintrin-2.c: Ditto.
* gcc.target/i386/x86gprintrin-3.c: Ditto.
* gcc.target/i386/x86gprintrin-4.c: Ditto.
* gcc.target/i386/x86gprintrin-5.c: Ditto.
* gcc.target/i386/prefetchi-1.c: New test.
* gcc.target/i386/prefetchi-2.c: Ditto.
* gcc.target/i386/prefetchi-3.c: Ditto.
* gcc.target/i386/prefetchi-4.c: Ditto.

Co-authored-by: Hongtao Liu 
---
 gcc/common/config/i386/cpuinfo.h  |  2 +
 gcc/common/config/i386/i386-common.cc | 15 
 gcc/common/config/i386/i386-cpuinfo.h |  1 +
 gcc/common/config/i386/i386-isas.h|  1 +
 gcc/config.gcc|  2 +-
 gcc/config/i386/cpuid.h   |  1 +
 gcc/config/i386/i386-builtin-types.def|  4 +
 gcc/config/i386/i386-builtin.def  |  4 +
 gcc/config/i386/i386-c.cc |  2 +
 gcc/config/i386/i386-expand.cc| 77 +++
 gcc/config/i386/i386-isa.def  |  1 +
 gcc/config/i386/i386-options.cc   |  4 +-
 gcc/config/i386/i386.md   | 23 ++
 gcc/config/i386/i386.opt  |  4 +
 gcc/config/i386/predicates.md | 15 
 gcc/config/i386/prfchiintrin.h| 49 
 gcc/config/i386/x86gprintrin.h|  2 +
 gcc/config/i386/xmmintrin.h   |  7 +-
 gcc/doc/extend.texi   |  5 ++
 gcc/doc/invoke.texi   | 10 ++-
 gcc/doc/sourcebuild.texi  |  3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |  2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |  2 +-
 gcc/testsuite/gcc.target/i386/avx-1.c |  4 +-
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |  2 +
 gcc/testsuite/gcc.target/i386/prefetchi-1.c   | 40 ++
 gcc/testsuite/gcc.target/i386/prefetchi-2.c   | 26 +++
 gcc/testsuite/gcc.target/i386/prefetchi-3.c   | 20 +
 gcc/testsuite/gcc.target/i386/prefetchi-4.c   | 19 +
 gcc/testsuite/gcc.target/i386/sse-13.c|  4 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|  4 +-
 .../gcc.target/i386/x86gprintrin-1.c  |  2 +-
 .../gcc.target/i386/x86gprintrin-2.c  |  2 +-
 .../gcc.target/i386/x86gprintrin-3.c  |  2 +-
 .../gcc.target/i386/x86gprintrin-4.c  |  2 +-
 .../gcc.target/i386/x86gprintrin-5.c  |  2 +-
 36 files changed, 345 insertions(+), 20 

[PATCH] Support Intel CMPccXADD

2022-11-03 Thread Haochen Jiang via Gcc-patches
Hi all,

I just revised the patch according to review. The changes comparing to
previous version is mentioned below.

Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features):
Detect cmpccxadd.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_CMPCCXADD_SET,
OPTION_MASK_ISA2_CMPCCXADD_UNSET): New.
(ix86_handle_option): Handle -mcmpccxadd.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_CMPCCXADD.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
cmpccxadd.
* config.gcc: Add cmpccxaddintrin.h.
* config/i386/cpuid.h (bit_CMPCCXADD): New.
* config/i386/i386-builtin-types.def:
Add DEF_FUNCTION_TYPE(INT, PINT, INT, INT, INT)
and DEF_FUNCTION_TYPE(LONGLONG, PLONGLONG, LONGLONG, LONGLONG, INT).
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__CMPCCXADD__.
* config/i386/i386-expand.cc (ix86_expand_special_args_builtin):
Add new parameter to indicate constant position.
Handle INT_FTYPE_PINT_INT_INT_INT
and LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT.
* config/i386/i386-isa.def (CMPCCXADD): Add DEF_PTA(CMPCCXADD).
* config/i386/i386-options.cc (isa2_opts): Add -mcmpccxadd.
(ix86_valid_target_attribute_inner_p): Handle cmpccxadd.
* config/i386/i386.opt: Add option -mcmpccxadd.
* config/i386/sync.md (cmpccxadd_): New define insn.
* config/i386/x86gprintrin.h: Include cmpccxaddintrin.h.
* doc/extend.texi: Document cmpccxadd.
* doc/invoke.texi: Document -mcmpccxadd.
* doc/sourcebuild.texi: Document target cmpccxadd.
* config/i386/cmpccxaddintrin.h: New file.

gcc/testsuite/ChangeLog:

* g++.dg/other/i386-2.C: Add -mcmpccxadd.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/avx-1.c: Ditto.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/sse-13.c: Add -mcmpccxadd.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/x86gprintrin-1.c: Ditto.
* gcc.target/i386/x86gprintrin-2.c: Ditto.
* gcc.target/i386/x86gprintrin-3.c: Ditto.
* gcc.target/i386/x86gprintrin-4.c: Ditto.
* gcc.target/i386/x86gprintrin-5.c: Ditto.
* gcc.target/i386/cmpccxadd-1.c: New test.
* gcc.target/i386/cmpccxadd-2.c: Ditto.
---
 gcc/common/config/i386/cpuinfo.h  |   2 +
 gcc/common/config/i386/i386-common.cc |  15 ++
 gcc/common/config/i386/i386-cpuinfo.h |   1 +
 gcc/common/config/i386/i386-isas.h|   1 +
 gcc/config.gcc|   3 +-
 gcc/config/i386/cmpccxaddintrin.h |  89 +++
 gcc/config/i386/cpuid.h   |   1 +
 gcc/config/i386/i386-builtin-types.def|   4 +
 gcc/config/i386/i386-builtin.def  |   4 +
 gcc/config/i386/i386-c.cc |   2 +
 gcc/config/i386/i386-expand.cc|  22 ++-
 gcc/config/i386/i386-isa.def  |   1 +
 gcc/config/i386/i386-options.cc   |   4 +-
 gcc/config/i386/i386.opt  |   5 +
 gcc/config/i386/sync.md   |  29 
 gcc/config/i386/x86gprintrin.h|   2 +
 gcc/doc/extend.texi   |   5 +
 gcc/doc/invoke.texi   |  10 +-
 gcc/doc/sourcebuild.texi  |   3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |   2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |   2 +-
 gcc/testsuite/gcc.target/i386/avx-1.c |   4 +
 gcc/testsuite/gcc.target/i386/cmpccxadd-1.c   |  61 
 gcc/testsuite/gcc.target/i386/cmpccxadd-2.c   | 138 ++
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
 gcc/testsuite/gcc.target/i386/sse-13.c|   6 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|   6 +-
 .../gcc.target/i386/x86gprintrin-1.c  |   2 +-
 .../gcc.target/i386/x86gprintrin-2.c  |   6 +-
 .../gcc.target/i386/x86gprintrin-3.c  |   2 +-
 .../gcc.target/i386/x86gprintrin-4.c  |   2 +-
 .../gcc.target/i386/x86gprintrin-5.c  |   6 +-
 gcc/testsuite/lib/target-supports.exp |  10 ++
 33 files changed, 437 insertions(+), 15 deletions(-)
 create mode 100644 gcc/config/i386/cmpccxaddintrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/cmpccxadd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/cmpccxadd-2.c

diff --git a/gcc/config/i386/cmpccxaddintrin.h 
b/gcc/config/i386/cmpccxaddintrin.h
--- /dev/null
+++ b/gcc/config/i386/cmpccxaddintrin.h
+#define __cmpccxadd_epi64(A,B,C,D) \
+  __builtin_ia32_cmpccxadd64 ((long long *) (A), (long long) (B), \
+ (long long) (C), (_CMPCCX_ENUM) (D))
+#endif


[PATCH] Support Intel CMPccXADD

2022-10-24 Thread Haochen Jiang via Gcc-patches
Hi all,

I just refined CMPccXADD patch to make the enum in order intrin file
aligned with how opcode does.

Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features):
Detect cmpccxadd.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_CMPCCXADD_SET,
OPTION_MASK_ISA2_CMPCCXADD_UNSET): New.
(ix86_handle_option): Handle -mcmpccxadd, unset cmpccxadd when avx2
is disabled.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_CMPCCXADD.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
cmpccxadd.
* config.gcc: Add cmpccxaddintrin.h.
* config/i386/cpuid.h (bit_CMPCCXADD): New.
* config/i386/i386-builtin-types.def:
Add DEF_FUNCTION_TYPE(INT, PINT, INT, INT, INT)
and DEF_FUNCTION_TYPE(LONGLONG, PLONGLONG, LONGLONG, LONGLONG, INT).
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__CMPCCXADD__.
* config/i386/i386-expand.cc (ix86_expand_special_args_builtin):
Add new parameter to indicate constant position.
Handle INT_FTYPE_PINT_INT_INT_INT
and LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT.
* config/i386/i386-isa.def (CMPCCXADD): Add DEF_PTA(CMPCCXADD).
* config/i386/i386-options.cc (isa2_opts): Add -mcmpccxadd.
(ix86_valid_target_attribute_inner_p): Handle cmpccxadd.
* config/i386/i386.opt: Add option -mcmpccxadd.
* config/i386/sync.md (cmpccxadd_): New define insn.
* config/i386/x86gprintrin.h: Include cmpccxaddintrin.h.
* doc/extend.texi: Document cmpccxadd.
* doc/invoke.texi: Document -mcmpccxadd.
* doc/sourcebuild.texi: Document target cmpccxadd.
* config/i386/cmpccxaddintrin.h: New file.

gcc/testsuite/ChangeLog:

* g++.dg/other/i386-2.C: Add -mcmpccxadd.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/avx-1.c: Add builtin define for enum.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/sse-13.c: Add builtin define for enum.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/x86gprintrin-1.c: Add -mcmpccxadd for 64 bit target.
* gcc.target/i386/x86gprintrin-2.c: Add -mcmpccxadd for 64 bit target.
Add builtin define for enum.
* gcc.target/i386/x86gprintrin-3.c: Add -mcmpccxadd for 64 bit target.
* gcc.target/i386/x86gprintrin-4.c: Add mcmpccxadd for 64 bit target.
* gcc.target/i386/x86gprintrin-5.c: Add mcpmccxadd for 64 bit target.
Add builtin define for enum.
* gcc.target/i386/cmpccxadd-1.c: New test.
* gcc.target/i386/cmpccxadd-2.c: New test.
---
 gcc/common/config/i386/cpuinfo.h  |   2 +
 gcc/common/config/i386/i386-common.cc |  15 ++
 gcc/common/config/i386/i386-cpuinfo.h |   1 +
 gcc/common/config/i386/i386-isas.h|   1 +
 gcc/config.gcc|   3 +-
 gcc/config/i386/cmpccxaddintrin.h |  89 +++
 gcc/config/i386/cpuid.h   |   1 +
 gcc/config/i386/i386-builtin-types.def|   4 +
 gcc/config/i386/i386-builtin.def  |   4 +
 gcc/config/i386/i386-c.cc |   2 +
 gcc/config/i386/i386-expand.cc|  22 ++-
 gcc/config/i386/i386-isa.def  |   1 +
 gcc/config/i386/i386-options.cc   |   4 +-
 gcc/config/i386/i386.opt  |   5 +
 gcc/config/i386/sync.md   |  42 ++
 gcc/config/i386/x86gprintrin.h|   2 +
 gcc/doc/extend.texi   |   5 +
 gcc/doc/invoke.texi   |  10 +-
 gcc/doc/sourcebuild.texi  |   3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |   2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |   2 +-
 gcc/testsuite/gcc.target/i386/avx-1.c |   4 +
 gcc/testsuite/gcc.target/i386/cmpccxadd-1.c   |  61 
 gcc/testsuite/gcc.target/i386/cmpccxadd-2.c   | 138 ++
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
 gcc/testsuite/gcc.target/i386/sse-13.c|   6 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|   6 +-
 .../gcc.target/i386/x86gprintrin-1.c  |   2 +-
 .../gcc.target/i386/x86gprintrin-2.c  |   6 +-
 .../gcc.target/i386/x86gprintrin-3.c  |   2 +-
 .../gcc.target/i386/x86gprintrin-4.c  |   2 +-
 .../gcc.target/i386/x86gprintrin-5.c  |   6 +-
 gcc/testsuite/lib/target-supports.exp |  10 ++
 33 files changed, 450 insertions(+), 15 deletions(-)
 create mode 100644 gcc/config/i386/cmpccxaddintrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/cmpccxadd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/cmpccxadd-2.c

diff --git 

[PATCH v2] Add a parameter for the builtin function of prefetch to align with LLVM

2022-10-18 Thread Haochen Jiang via Gcc-patches
Hi Richard,

This is my new patch and changes the warning message on aarch64/arm.

Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* builtins.cc (expand_builtin_prefetch): Handle the fourth parameter in
expand function.
* config/aarch64/aarch64-sve.md: Add default parameter value.
* config/aarch64/aarch64.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/alpha/alpha.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/arc/arc.md: Add default parameter value.
* config/arm/arm.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/frv/frv.md: Ditto.
* config/i386/i386.md: Ditto.
* config/ia64/ia64.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/mips/mips.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/pa/pa.md: Ditto.
* config/rs6000/rs6000.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/s390/s390.cc (s390_expand_cpymem): Generate fourth parameter 
for
gen_prefetch call.
(s390_expand_setmem): Ditto.
(s390_expand_cmpmem): Ditto.
* config/s390/s390.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/sh/sh.md: Ditto.
* config/sparc/sparc.md: Ditto.
* doc/rtl.texi: Document cache variable for prefetch.
* rtl.def (PREFETCH): Change prefetch DEF_RTL_EXPR to add fourth 
parameter.
* rtlanal.cc (setup_reg_subrtx_bounds): Change gcc_checking_assert for
fourth parameter.
* target-insns.def (prefetch): Add fourth rtx for prefetch.

gcc/testsuite/ChangeLog:

* gcc.c-torture/execute/builtin-prefetch-1.c: Add fourth parameter for
testcases.
* gcc.c-torture/execute/builtin-prefetch-2.c: Ditto.
* gcc.c-torture/execute/builtin-prefetch-3.c: Ditto.
* gcc.c-torture/execute/builtin-prefetch-4.c: Ditto.
* gcc.c-torture/execute/builtin-prefetch-5.c: Ditto.
* gcc.c-torture/execute/builtin-prefetch-6.c: Ditto.
* gcc.dg/builtin-prefetch-1.c: Ditto.
* gcc.misc-tests/i386-pf-3dnow-1.c: Ditto.
* gcc.misc-tests/i386-pf-athlon-1.c: Ditto.
* gcc.misc-tests/i386-pf-none-1.c: Ditto.
* gcc.misc-tests/i386-pf-sse-1.c: Ditto.
* gcc.target/i386/avx-1.c: Change prefetch macro define to variable 
args.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/aarch64/prefetchi-1.c: New test.
* gcc.target/alpha/prefetchi-1.c: Ditto.
* gcc.target/arc/prefetchi-1.c: Ditto.
* gcc.target/arm/prefetchi-1.c: Ditto.
* gcc.target/hppa/prefetchi-1.c: Ditto.
* gcc.target/i386/prefetchi-1.c: Ditto.
* gcc.target/ia64/prefetchi-1.c: Ditto.
* gcc.target/mips/prefetchi-1.c: Ditto.
* gcc.target/powerpc/prefetchi-1.c: Ditto.
* gcc.target/s390/prefetchi-1.c: Ditto.
* gcc.target/sh/prefetchi-1.c: Ditto.
* gcc.target/sparc/prefetchi-1.c: Ditto.
---
 gcc/builtins.cc   |  34 --
 gcc/config/aarch64/aarch64-sve.md |  15 ++-
 gcc/config/aarch64/aarch64.md |  19 +++-
 gcc/config/alpha/alpha.md |  19 +++-
 gcc/config/arc/arc.md |  20 +++-
 gcc/config/arm/arm.md |  19 +++-
 gcc/config/frv/frv.md |   6 +-
 gcc/config/i386/i386.md   |  17 ++-
 gcc/config/ia64/ia64.md   |  19 +++-
 gcc/config/mips/mips.md   |  22 +++-
 gcc/config/pa/pa.md   |  12 +-
 gcc/config/rs6000/rs6000.md   |  19 +++-
 gcc/config/s390/s390.cc   |  10 +-
 gcc/config/s390/s390.md   |  19 +++-
 gcc/config/sh/sh.md   |  15 ++-
 gcc/config/sparc/sparc.md |  15 ++-
 gcc/doc/rtl.texi  |   6 +-
 gcc/rtl.def   |   5 +-
 gcc/rtlanal.cc|   2 +-
 gcc/target-insns.def  |   2 +-
 .../execute/builtin-prefetch-1.c  |  45 
 .../execute/builtin-prefetch-2.c  | 106 +-
 .../execute/builtin-prefetch-3.c  |  92 +++
 .../execute/builtin-prefetch-4.c  |  44 
 .../execute/builtin-prefetch-5.c  |  12 +-
 .../execute/builtin-prefetch-6.c  |   4 +-
 gcc/testsuite/gcc.dg/builtin-prefetch-1.c |   5 +-
 .../gcc.misc-tests/i386-pf-3dnow-1.c  |  16 +--
 .../gcc.misc-tests/i386-pf-athlon-1.c |  16 +--
 

[PATCH] i386: Auto vectorize sdot_prod, udot_prod with VNNIINT8 instruction.

2022-10-18 Thread Haochen Jiang via Gcc-patches
Hi all,

We would like to add one more patch to enhance the codegen with avxvnniint8.
Also renamed two awkward named mode_attr to make them more aligned with others.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* config/i386/sse.md (ssedvecmode): Rename from VI1SI.
(ssedvecmodelower): Rename from vi1si.
(sdot_prod): New define_expand.
(udot_prod): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/vnniint8-auto-vectorize-1.c: New test.
* gcc.target/i386/vnniint8-auto-vectorize-2.c: Ditto.
---
 gcc/config/i386/sse.md| 61 ---
 .../i386/vnniint8-auto-vectorize-1.c  | 28 +++
 .../i386/vnniint8-auto-vectorize-2.c  | 75 +++
 3 files changed, 153 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-2.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 29cf6fa090b..fc17b5193dc 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1043,6 +1043,13 @@
(V16HI "v16hi") (V8HI "v8hi")
(V32QI "v32qi") (V16QI "v16qi")])
 
+;; Mapping of vector modes to an V*SImode of the same size
+(define_mode_attr ssedvecmode
+ [(V64QI "V16SI") (V32QI "V8SI") (V16QI "V4SI")])
+
+(define_mode_attr ssedvecmodelower
+ [(V64QI "v16si") (V32QI "v8si") (V16QI "v4si")])
+
 ;; Mapping of vector modes to a vector mode of double size
 (define_mode_attr ssedoublevecmode
   [(V64QI "V128QI") (V32HI "V64HI") (V16SI "V32SI") (V8DI "V16DI")
@@ -28523,29 +28530,23 @@
[(set_attr ("prefix") ("evex"))
(set_attr "mode" "")])
 
-(define_mode_attr VI1SI
- [(V64QI "V16SI") (V32QI "V8SI") (V16QI "V4SI")])
-
-(define_mode_attr vi1si
- [(V64QI "v16si") (V32QI "v8si") (V16QI "v4si")])
-
 (define_expand "usdot_prod"
-  [(match_operand: 0 "register_operand")
+  [(match_operand: 0 "register_operand")
(match_operand:VI1_AVX512VNNI 1 "register_operand")
(match_operand:VI1_AVX512VNNI 2 "register_operand")
-   (match_operand: 3 "register_operand")]
+   (match_operand: 3 "register_operand")]
   "( == 64
 ||((TARGET_AVX512VNNI && TARGET_AVX512VL)
|| TARGET_AVXVNNI))"
 {
-  operands[1] = lowpart_subreg (mode,
+  operands[1] = lowpart_subreg (mode,
force_reg (mode, operands[1]),
mode);
-  operands[2] = lowpart_subreg (mode,
+  operands[2] = lowpart_subreg (mode,
force_reg (mode, operands[2]),
mode);
   emit_insn (gen_rtx_SET (operands[0], operands[3]));
-  emit_insn (gen_vpdpbusd_ (operands[0], operands[3],
+  emit_insn (gen_vpdpbusd_ (operands[0], operands[3],
  operands[1], operands[2]));
   DONE;
 })
@@ -29358,6 +29359,44 @@
(UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds")
(UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")])
 
+(define_expand "sdot_prod"
+  [(match_operand: 0 "register_operand")
+   (match_operand:VI1 1 "register_operand")
+   (match_operand:VI1 2 "register_operand")
+   (match_operand: 3 "register_operand")]
+  "TARGET_AVXVNNIINT8"
+{
+  operands[1] = lowpart_subreg (mode,
+force_reg (mode, operands[1]),
+mode);
+  operands[2] = lowpart_subreg (mode,
+force_reg (mode, operands[2]),
+mode);
+  emit_insn (gen_rtx_SET (operands[0], operands[3]));
+  emit_insn (gen_vpdpbssd_ (operands[0], operands[3],
+  operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "udot_prod"
+  [(match_operand: 0 "register_operand")
+   (match_operand:VI1 1 "register_operand")
+   (match_operand:VI1 2 "register_operand")
+   (match_operand: 3 "register_operand")]
+  "TARGET_AVXVNNIINT8"
+{
+  operands[1] = lowpart_subreg (mode,
+force_reg (mode, operands[1]),
+mode);
+  operands[2] = lowpart_subreg (mode,
+force_reg (mode, operands[2]),
+mode);
+  emit_insn (gen_rtx_SET (operands[0], operands[3]));
+  emit_insn (gen_vpdpbuud_ (operands[0], operands[3],
+  operands[1], operands[2]));
+  DONE;
+})
+
 (define_insn "vpdp_"
   [(set (match_operand:VI4_AVX 0 "register_operand" "=x")
(unspec:VI4_AVX
diff --git a/gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-1.c 
b/gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-1.c
new file mode 100644
index 000..9cadab6a845
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */ 
+/* { dg-options "-mavxvnniint8 -O2" } */
+/* { dg-final { scan-assembler "vpdpbssd\t" } } */
+/* { 

[PATCH v2] Support Intel AVX-VNNI-INT8

2022-10-18 Thread Haochen Jiang via Gcc-patches
From: Kong Lingling 

Hi all,

This is our v2 patch on AVX-VNNI-INT8. This main change in this patch is to
rename the previous UNSPEC_VPMADDxxx things to new vnni style.

Ok for trunk?

BRs,
Haochen

gcc/ChangeLog

* common/config/i386/cpuinfo.h (get_available_features): Detect
avxvnniint8.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_AVXVNNIINT8_SET): New.
(OPTION_MASK_ISA2_AVXVNNIINT8_UNSET): Ditto.
(ix86_handle_option): Handle -mavxvnniint8.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_AVXVNNIINT8.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
avxvnniint8.
* config.gcc: Add avxvnniint8intrin.h.
* config/i386/avxvnniint8intrin.h: New file.
* config/i386/cpuid.h (bit_AVXVNNIINT8): New.
* config/i386/i386-builtin.def: Add new builtins.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__AVXVNNIINT8__.
* config/i386/i386-options.cc (isa2_opts): Add -mavxvnniint8.
(ix86_valid_target_attribute_inner_p): Handle avxvnniint8.
* config/i386/i386-isa.def: Add DEF_PTA(AVXVNNIINT8) New..
* config/i386/i386.opt: Add option -mavxvnniint8.
* config/i386/immintrin.h: Include avxvnniint8intrin.h.
* config/i386/sse.md (UNSPEC_VPMADDUBSWACCD
UNSPEC_VPMADDUBSWACCSSD,UNSPEC_VPMADDWDACCD,
UNSPEC_VPMADDWDACCSSD): Rename according to new style.
(vpdp_): New define_insn.
* doc/extend.texi: Document avxvnniint8.
* doc/invoke.texi: Document -mavxvnniint8.
* doc/sourcebuild.texi: Document target avxvnniint8.

gcc/testsuite/ChangeLog

* g++.dg/other/i386-2.C: Add -mavxvnniint8.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/avx-check.h: Add avxvnniint8 check.
* gcc.target/i386/sse-12.c: Add -mavxvnniint8.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* lib/target-supports.exp
(check_effective_target_avxvnniint8): New.
* gcc.target/i386/avxvnniint8-1.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbssd-2.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbssds-2.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbsud-2.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbsuds-2.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbuud-2.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbuuds-2.c: Ditto.

Co-authored-by: Hongyu Wang 
Co-authored-by: Haochen Jiang 
---
 gcc/common/config/i386/cpuinfo.h  |   2 +
 gcc/common/config/i386/i386-common.cc |  22 ++-
 gcc/common/config/i386/i386-cpuinfo.h |   1 +
 gcc/common/config/i386/i386-isas.h|   2 +
 gcc/config.gcc|   2 +-
 gcc/config/i386/avxvnniint8intrin.h   | 138 ++
 gcc/config/i386/cpuid.h   |   1 +
 gcc/config/i386/i386-builtin.def  |  14 ++
 gcc/config/i386/i386-c.cc |   2 +
 gcc/config/i386/i386-isa.def  |   1 +
 gcc/config/i386/i386-options.cc   |   4 +-
 gcc/config/i386/i386.opt  |   5 +
 gcc/config/i386/immintrin.h   |   2 +
 gcc/config/i386/sse.md|  71 ++---
 gcc/doc/extend.texi   |   5 +
 gcc/doc/invoke.texi   |   9 +-
 gcc/doc/sourcebuild.texi  |   3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |   2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |   2 +-
 gcc/testsuite/gcc.target/i386/avx-check.h |   3 +
 gcc/testsuite/gcc.target/i386/avxvnniint8-1.c |  43 ++
 .../gcc.target/i386/avxvnniint8-vpdpbssd-2.c  |  72 +
 .../gcc.target/i386/avxvnniint8-vpdpbssds-2.c |  72 +
 .../gcc.target/i386/avxvnniint8-vpdpbsud-2.c  |  72 +
 .../gcc.target/i386/avxvnniint8-vpdpbsuds-2.c |  72 +
 .../gcc.target/i386/avxvnniint8-vpdpbuud-2.c  |  72 +
 .../gcc.target/i386/avxvnniint8-vpdpbuuds-2.c |  72 +
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
 gcc/testsuite/gcc.target/i386/sse-12.c|   2 +-
 gcc/testsuite/gcc.target/i386/sse-13.c|   2 +-
 gcc/testsuite/gcc.target/i386/sse-14.c|   2 +-
 gcc/testsuite/gcc.target/i386/sse-22.c|   4 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|   2 +-
 gcc/testsuite/lib/target-supports.exp |  12 ++
 34 files changed, 758 insertions(+), 34 deletions(-)
 create mode 100644 gcc/config/i386/avxvnniint8intrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c
 create mode 100644 

[PATCH 1/2] Add a parameter for the builtin function of prefetch to align with LLVM

2022-10-14 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* builtins.cc (expand_builtin_prefetch): Handle the fourth parameter in
expand function.
* config/aarch64/aarch64-sve.md: Add default parameter value.
* config/aarch64/aarch64.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/alpha/alpha.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/arc/arc.md: Add default parameter value.
* config/arm/arm.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/frv/frv.md: Ditto.
* config/i386/i386.md: Ditto.
* config/ia64/ia64.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/mips/mips.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/pa/pa.md: Ditto.
* config/rs6000/rs6000.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/s390/s390.cc (s390_expand_cpymem): Generate fourth parameter 
for
gen_prefetch call.
(s390_expand_setmem): Ditto.
(s390_expand_cmpmem): Ditto.
* config/s390/s390.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/sh/sh.md: Ditto.
* config/sparc/sparc.md: Ditto.
* doc/rtl.texi: Document cache variable for prefetch.
* rtl.def (PREFETCH): Change prefetch DEF_RTL_EXPR to add fourth 
parameter.
* rtlanal.cc (setup_reg_subrtx_bounds): Change gcc_checking_assert for
fourth parameter.
* target-insns.def (prefetch): Add fourth rtx for prefetch.

gcc/testsuite/ChangeLog:

* gcc.c-torture/execute/builtin-prefetch-1.c: Add fourth parameter for
testcases.
* gcc.c-torture/execute/builtin-prefetch-2.c: Ditto.
* gcc.c-torture/execute/builtin-prefetch-3.c: Ditto.
* gcc.c-torture/execute/builtin-prefetch-4.c: Ditto.
* gcc.c-torture/execute/builtin-prefetch-5.c: Ditto.
* gcc.c-torture/execute/builtin-prefetch-6.c: Ditto.
* gcc.dg/builtin-prefetch-1.c: Ditto.
* gcc.misc-tests/i386-pf-3dnow-1.c: Ditto.
* gcc.misc-tests/i386-pf-athlon-1.c: Ditto.
* gcc.misc-tests/i386-pf-none-1.c: Ditto.
* gcc.misc-tests/i386-pf-sse-1.c: Ditto.
* gcc.target/i386/avx-1.c: Change prefetch macro define to variable 
args.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/aarch64/prefetchi-1.c: New test.
* gcc.target/alpha/prefetchi-1.c: Ditto.
* gcc.target/arc/prefetchi-1.c: Ditto.
* gcc.target/arm/prefetchi-1.c: Ditto.
* gcc.target/hppa/prefetchi-1.c: Ditto.
* gcc.target/i386/prefetchi-1.c: Ditto.
* gcc.target/ia64/prefetchi-1.c: Ditto.
* gcc.target/mips/prefetchi-1.c: Ditto.
* gcc.target/powerpc/prefetchi-1.c: Ditto.
* gcc.target/s390/prefetchi-1.c: Ditto.
* gcc.target/sh/prefetchi-1.c: Ditto.
* gcc.target/sparc/prefetchi-1.c: Ditto.
---
 gcc/builtins.cc   |  34 --
 gcc/config/aarch64/aarch64-sve.md |  15 ++-
 gcc/config/aarch64/aarch64.md |  19 +++-
 gcc/config/alpha/alpha.md |  19 +++-
 gcc/config/arc/arc.md |  20 +++-
 gcc/config/arm/arm.md |  19 +++-
 gcc/config/frv/frv.md |   6 +-
 gcc/config/i386/i386.md   |  17 ++-
 gcc/config/ia64/ia64.md   |  19 +++-
 gcc/config/mips/mips.md   |  22 +++-
 gcc/config/pa/pa.md   |  12 +-
 gcc/config/rs6000/rs6000.md   |  19 +++-
 gcc/config/s390/s390.cc   |  10 +-
 gcc/config/s390/s390.md   |  19 +++-
 gcc/config/sh/sh.md   |  15 ++-
 gcc/config/sparc/sparc.md |  15 ++-
 gcc/doc/rtl.texi  |   6 +-
 gcc/rtl.def   |   5 +-
 gcc/rtlanal.cc|   2 +-
 gcc/target-insns.def  |   2 +-
 .../execute/builtin-prefetch-1.c  |  45 
 .../execute/builtin-prefetch-2.c  | 106 +-
 .../execute/builtin-prefetch-3.c  |  92 +++
 .../execute/builtin-prefetch-4.c  |  44 
 .../execute/builtin-prefetch-5.c  |  12 +-
 .../execute/builtin-prefetch-6.c  |   4 +-
 gcc/testsuite/gcc.dg/builtin-prefetch-1.c |   5 +-
 .../gcc.misc-tests/i386-pf-3dnow-1.c  |  16 +--
 .../gcc.misc-tests/i386-pf-athlon-1.c |  16 +--
 gcc/testsuite/gcc.misc-tests/i386-pf-none-1.c |  16 +--
 gcc/testsuite/gcc.misc-tests/i386-pf-sse-1.c  |  16 +--
 .../gcc.target/aarch64/prefetchi-1.c 

[PATCH 2/2] Support Intel prefetchit0/t1

2022-10-14 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features):
Detect PREFETCHI.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_PREFETCHI_SET,
OPTION_MASK_ISA2_PREFETCHI_UNSET): New.
(ix86_handle_option): Handle -mprefetchi.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_PREFETCHI.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
prefetchi.
* config.gcc: Add prfchiintrin.h.
* config/i386/cpuid.h (bit_PREFETCHI): New.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__PREFETCHI__.
* config/i386/i386-isa.def (PREFETCHI): Add DEF_PTA(PREFETCHI).
* config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p):
Handle prefetchi.
* config/i386/i386.md (prefetch): Add handler for prefetchi
(*prefetch_i): New define_insn.
* config/i386/i386.opt: Add option -mprefetchi.
* config/i386/immintrin.h: Include prfchiintrin.h.
* config/i386/predicates.md (local_func_symbolic_operand):
New predicates.
* config/i386/xmmintrin.h (enum _mm_hint): New enum for prefetchi.
(_mm_prefetch): Handle the highest bit of enum.
* doc/extend.texi: Document prefetchi.
* doc/invoke.texi: Document -mprefetchi.
* doc/sourcebuild.texi: Document target prefetchi.
* config/i386/prfchiintrin.h: New file.

gcc/testsuite/ChangeLog:

* g++.dg/other/i386-2.C: Add -mprefetchi.
* g++.dg/other/i386-3.C: Ditto.
* gcc.misc-tests/i386-pf-3dnow-1.c: Add scan-assembler-not for
prefetchit0/t1.
* gcc.misc-tests/i386-pf-athlon-1.c: Ditto.
* gcc.misc-tests/i386-pf-sse-1.c: Ditto.
* gcc.target/i386/avx-1.c: Add -mprefetchi.
* gcc.target/i386/avx-2.c: Ditto.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/prefetchi-1.c: Rewrite testcase.
* gcc.target/i386/prefetchi-2.c: New test.
* gcc.target/i386/prefetchi-3.c: Ditto.
* gcc.target/i386/sse-12.c: Add -mprefetchi.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add prefetchi.
* gcc.target/i386/sse-23.c: Ditto.

Co-authored-by: Hongtao Liu 
---
 gcc/common/config/i386/cpuinfo.h  |  2 +
 gcc/common/config/i386/i386-common.cc | 15 
 gcc/common/config/i386/i386-cpuinfo.h |  1 +
 gcc/common/config/i386/i386-isas.h|  1 +
 gcc/config.gcc|  2 +-
 gcc/config/i386/cpuid.h   |  1 +
 gcc/config/i386/i386-c.cc |  2 +
 gcc/config/i386/i386-isa.def  |  1 +
 gcc/config/i386/i386-options.cc   |  4 +-
 gcc/config/i386/i386.md   | 90 +--
 gcc/config/i386/i386.opt  |  4 +
 gcc/config/i386/immintrin.h   |  2 +
 gcc/config/i386/predicates.md | 15 
 gcc/config/i386/prfchiintrin.h| 39 
 gcc/config/i386/xmmintrin.h   |  6 +-
 gcc/doc/extend.texi   |  5 ++
 gcc/doc/invoke.texi   | 10 ++-
 gcc/doc/sourcebuild.texi  |  3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |  2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |  2 +-
 .../gcc.misc-tests/i386-pf-3dnow-1.c  |  2 +
 .../gcc.misc-tests/i386-pf-athlon-1.c |  2 +
 gcc/testsuite/gcc.misc-tests/i386-pf-sse-1.c  |  2 +
 gcc/testsuite/gcc.target/i386/avx-1.c |  2 +-
 gcc/testsuite/gcc.target/i386/avx-2.c |  2 +-
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |  2 +
 gcc/testsuite/gcc.target/i386/prefetchi-1.c   | 36 ++--
 gcc/testsuite/gcc.target/i386/prefetchi-2.c   | 26 ++
 gcc/testsuite/gcc.target/i386/prefetchi-3.c   | 15 
 gcc/testsuite/gcc.target/i386/sse-12.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-13.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-14.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-22.c|  4 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|  2 +-
 34 files changed, 259 insertions(+), 49 deletions(-)
 create mode 100644 gcc/config/i386/prfchiintrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-3.c

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 118f3a42abd..551e0483330 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -797,6 +797,8 @@ get_available_features (struct __processor_model *cpu_model,
set_feature (FEATURE_HRESET);
   if (eax & bit_CMPCCXADD)
set_feature(FEATURE_CMPCCXADD);
+  if (edx & bit_PREFETCHI)
+   set_feature (FEATURE_PREFETCHI);
   if 

[PATCH 0/2] Add a Fourth parameter for prefetch and Support Intel PREFETCHI

2022-10-14 Thread Haochen Jiang via Gcc-patches
Hi all,

Sorry for the previous cover-letter stucking and disturbance and this
is the right cover letter.

These two patches aimed to add Intel PREFETCHI.

The information is based on newly released
Intel Architecture Instruction Set Extensions and Future Features.

The document comes following:
https://www.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html

The first patch added a fourth parameter for prefetch to align with LLVM
in middle end. Currently LLVM had a fourth parameter to indicate what is
prefetching. Also added a warning on backends that does not support
instruction prefetch in machine description file to tell users attempting
using prefetchi that the backend will change it to data prefetch.

The second patch was i386 specific and added PREFETCHI to i386.

Regtested on x86_64-pc-linux-gnu and cross-compiled to other backends.
For other backends, I ran through the compile test and no regressions found.
Since I did not have machines from other backends, could you kindly help
me to test with other machines? I suppose there should not have regressions
since I just added a warning to the md file and corresponding testcase.

Ok for trunk?

BRs,
Haochen





[PATCH 1/3] Add a parameter for the builtin function of prefetch to align with LLVM

2022-10-14 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* builtins.cc (expand_builtin_prefetch): Handle the fourth parameter in
expand function.
* config/aarch64/aarch64-sve.md: Add default parameter value.
* config/aarch64/aarch64.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/alpha/alpha.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/arc/arc.md: Add default parameter value.
* config/arm/arm.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/frv/frv.md: Ditto.
* config/i386/i386.md: Ditto.
* config/ia64/ia64.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/mips/mips.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/pa/pa.md: Ditto.
* config/rs6000/rs6000.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/s390/s390.cc (s390_expand_cpymem): Generate fourth parameter 
for
gen_prefetch call.
(s390_expand_setmem): Ditto.
(s390_expand_cmpmem): Ditto.
* config/s390/s390.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/sh/sh.md: Ditto.
* config/sparc/sparc.md: Ditto.
* doc/rtl.texi: Document cache variable for prefetch.
* rtl.def (PREFETCH): Change prefetch DEF_RTL_EXPR to add fourth 
parameter.
* rtlanal.cc (setup_reg_subrtx_bounds): Change gcc_checking_assert for
fourth parameter.
* target-insns.def (prefetch): Add fourth rtx for prefetch.

gcc/testsuite/ChangeLog:

* gcc.c-torture/execute/builtin-prefetch-1.c: Add fourth parameter for
testcases.
* gcc.c-torture/execute/builtin-prefetch-2.c: Ditto.
* gcc.c-torture/execute/builtin-prefetch-3.c: Ditto.
* gcc.c-torture/execute/builtin-prefetch-4.c: Ditto.
* gcc.c-torture/execute/builtin-prefetch-5.c: Ditto.
* gcc.c-torture/execute/builtin-prefetch-6.c: Ditto.
* gcc.dg/builtin-prefetch-1.c: Ditto.
* gcc.misc-tests/i386-pf-3dnow-1.c: Ditto.
* gcc.misc-tests/i386-pf-athlon-1.c: Ditto.
* gcc.misc-tests/i386-pf-none-1.c: Ditto.
* gcc.misc-tests/i386-pf-sse-1.c: Ditto.
* gcc.target/i386/avx-1.c: Change prefetch macro define to variable 
args.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/aarch64/prefetchi-1.c: New test.
* gcc.target/alpha/prefetchi-1.c: Ditto.
* gcc.target/arc/prefetchi-1.c: Ditto.
* gcc.target/arm/prefetchi-1.c: Ditto.
* gcc.target/hppa/prefetchi-1.c: Ditto.
* gcc.target/i386/prefetchi-1.c: Ditto.
* gcc.target/ia64/prefetchi-1.c: Ditto.
* gcc.target/mips/prefetchi-1.c: Ditto.
* gcc.target/powerpc/prefetchi-1.c: Ditto.
* gcc.target/s390/prefetchi-1.c: Ditto.
* gcc.target/sh/prefetchi-1.c: Ditto.
* gcc.target/sparc/prefetchi-1.c: Ditto.
---
 gcc/builtins.cc   |  34 --
 gcc/config/aarch64/aarch64-sve.md |  15 ++-
 gcc/config/aarch64/aarch64.md |  19 +++-
 gcc/config/alpha/alpha.md |  19 +++-
 gcc/config/arc/arc.md |  20 +++-
 gcc/config/arm/arm.md |  19 +++-
 gcc/config/frv/frv.md |   6 +-
 gcc/config/i386/i386.md   |  17 ++-
 gcc/config/ia64/ia64.md   |  19 +++-
 gcc/config/mips/mips.md   |  22 +++-
 gcc/config/pa/pa.md   |  12 +-
 gcc/config/rs6000/rs6000.md   |  19 +++-
 gcc/config/s390/s390.cc   |  10 +-
 gcc/config/s390/s390.md   |  19 +++-
 gcc/config/sh/sh.md   |  15 ++-
 gcc/config/sparc/sparc.md |  15 ++-
 gcc/doc/rtl.texi  |   6 +-
 gcc/rtl.def   |   5 +-
 gcc/rtlanal.cc|   2 +-
 gcc/target-insns.def  |   2 +-
 .../execute/builtin-prefetch-1.c  |  45 
 .../execute/builtin-prefetch-2.c  | 106 +-
 .../execute/builtin-prefetch-3.c  |  92 +++
 .../execute/builtin-prefetch-4.c  |  44 
 .../execute/builtin-prefetch-5.c  |  12 +-
 .../execute/builtin-prefetch-6.c  |   4 +-
 gcc/testsuite/gcc.dg/builtin-prefetch-1.c |   5 +-
 .../gcc.misc-tests/i386-pf-3dnow-1.c  |  16 +--
 .../gcc.misc-tests/i386-pf-athlon-1.c |  16 +--
 gcc/testsuite/gcc.misc-tests/i386-pf-none-1.c |  16 +--
 gcc/testsuite/gcc.misc-tests/i386-pf-sse-1.c  |  16 +--
 .../gcc.target/aarch64/prefetchi-1.c 

[PATCH 2/3] Support Intel prefetchit0/t1

2022-10-14 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features):
Detect PREFETCHI.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_PREFETCHI_SET,
OPTION_MASK_ISA2_PREFETCHI_UNSET): New.
(ix86_handle_option): Handle -mprefetchi.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_PREFETCHI.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
prefetchi.
* config.gcc: Add prfchiintrin.h.
* config/i386/cpuid.h (bit_PREFETCHI): New.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__PREFETCHI__.
* config/i386/i386-isa.def (PREFETCHI): Add DEF_PTA(PREFETCHI).
* config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p):
Handle prefetchi.
* config/i386/i386.md (prefetch): Add handler for prefetchi
(*prefetch_i): New define_insn.
* config/i386/i386.opt: Add option -mprefetchi.
* config/i386/immintrin.h: Include prfchiintrin.h.
* config/i386/predicates.md (local_func_symbolic_operand):
New predicates.
* config/i386/xmmintrin.h (enum _mm_hint): New enum for prefetchi.
(_mm_prefetch): Handle the highest bit of enum.
* doc/extend.texi: Document prefetchi.
* doc/invoke.texi: Document -mprefetchi.
* doc/sourcebuild.texi: Document target prefetchi.
* config/i386/prfchiintrin.h: New file.

gcc/testsuite/ChangeLog:

* g++.dg/other/i386-2.C: Add -mprefetchi.
* g++.dg/other/i386-3.C: Ditto.
* gcc.misc-tests/i386-pf-3dnow-1.c: Add scan-assembler-not for
prefetchit0/t1.
* gcc.misc-tests/i386-pf-athlon-1.c: Ditto.
* gcc.misc-tests/i386-pf-sse-1.c: Ditto.
* gcc.target/i386/avx-1.c: Add -mprefetchi.
* gcc.target/i386/avx-2.c: Ditto.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/prefetchi-1.c: Rewrite testcase.
* gcc.target/i386/prefetchi-2.c: New test.
* gcc.target/i386/prefetchi-3.c: Ditto.
* gcc.target/i386/sse-12.c: Add -mprefetchi.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add prefetchi.
* gcc.target/i386/sse-23.c: Ditto.

Co-authored-by: Hongtao Liu 
---
 gcc/common/config/i386/cpuinfo.h  |  2 +
 gcc/common/config/i386/i386-common.cc | 15 
 gcc/common/config/i386/i386-cpuinfo.h |  1 +
 gcc/common/config/i386/i386-isas.h|  1 +
 gcc/config.gcc|  2 +-
 gcc/config/i386/cpuid.h   |  1 +
 gcc/config/i386/i386-c.cc |  2 +
 gcc/config/i386/i386-isa.def  |  1 +
 gcc/config/i386/i386-options.cc   |  4 +-
 gcc/config/i386/i386.md   | 90 +--
 gcc/config/i386/i386.opt  |  4 +
 gcc/config/i386/immintrin.h   |  2 +
 gcc/config/i386/predicates.md | 15 
 gcc/config/i386/prfchiintrin.h| 39 
 gcc/config/i386/xmmintrin.h   |  6 +-
 gcc/doc/extend.texi   |  5 ++
 gcc/doc/invoke.texi   | 10 ++-
 gcc/doc/sourcebuild.texi  |  3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |  2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |  2 +-
 .../gcc.misc-tests/i386-pf-3dnow-1.c  |  2 +
 .../gcc.misc-tests/i386-pf-athlon-1.c |  2 +
 gcc/testsuite/gcc.misc-tests/i386-pf-sse-1.c  |  2 +
 gcc/testsuite/gcc.target/i386/avx-1.c |  2 +-
 gcc/testsuite/gcc.target/i386/avx-2.c |  2 +-
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |  2 +
 gcc/testsuite/gcc.target/i386/prefetchi-1.c   | 36 ++--
 gcc/testsuite/gcc.target/i386/prefetchi-2.c   | 26 ++
 gcc/testsuite/gcc.target/i386/prefetchi-3.c   | 15 
 gcc/testsuite/gcc.target/i386/sse-12.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-13.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-14.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-22.c|  4 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|  2 +-
 34 files changed, 259 insertions(+), 49 deletions(-)
 create mode 100644 gcc/config/i386/prfchiintrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-3.c

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 118f3a42abd..551e0483330 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -797,6 +797,8 @@ get_available_features (struct __processor_model *cpu_model,
set_feature (FEATURE_HRESET);
   if (eax & bit_CMPCCXADD)
set_feature(FEATURE_CMPCCXADD);
+  if (edx & bit_PREFETCHI)
+   set_feature (FEATURE_PREFETCHI);
   if 

[PATCH] Support Intel AMX-FP16 ISA

2022-10-14 Thread Haochen Jiang via Gcc-patches
From: Hongyu Wang 

Hi all,

This patch aimed to add Intel AMX-FP16 ISA according to newly
released Intel Architecture Instruction Set Extensions and Future Features.

The document comes following:
https://www.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features): Detect
amx-fp16.
* common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AMX_FP16_SET,
(OPTION_MASK_ISA2_AMX_FP16_UNSET): New macros.
(ix86_handle_option): Handle -mamx-fp16.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_AMX_FP16.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
amx-fp16.
* config.gcc: Add amxfp16intrin.h.
* config/i386/cpuid.h (bit_AMX_FP16): New.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__AMX_FP16__.
* config/i386/i386-options.cc (isa2_opts): Add -mamx-fp16.
(ix86_valid_target_attribute_inner_p): Add new ATTR.
(ix86_option_override_internal): Handle AMX-FP16.
* config/i386/i386-isas.def: Add DEF_PTA for AMX_FP16.
* config/i386/i386.opt: Add -mamx-fp16.
* config/i386/immintrin.h: Include amxfp16intrin.h.
* doc/extend.texi: Document -mamx-fp16.
* doc/invoke.texi: Document amx-fp16.
* doc/sourcebuild.texi: Document amx_fp16.
* config/i386/amxfp16intrin.h: New file.

gcc/testsuite/ChangeLog:

* g++.dg/other/i386-2.C: Add -mamx-fp16.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/sse-12.c: Ditto.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* lib/target-supports.exp: (check_effective_target_amx_fp16):
New proc.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/amx-helper.h: New file to support amx-fp16.
* gcc.target/i386/amxfp16-asmatt-1.c: New test.
* gcc.target/i386/amxfp16-asmintel-1.c: Ditto.
* gcc.target/i386/amxfp16-dpfp16ps-2.c: Ditto.

Co-authored-by: Haochen Jiang 
---
 gcc/common/config/i386/cpuinfo.h  |  5 ++
 gcc/common/config/i386/i386-common.cc | 15 +
 gcc/common/config/i386/i386-cpuinfo.h |  1 +
 gcc/common/config/i386/i386-isas.h|  1 +
 gcc/config.gcc|  2 +-
 gcc/config/i386/amxfp16intrin.h   | 46 ++
 gcc/config/i386/cpuid.h   |  1 +
 gcc/config/i386/i386-c.cc |  2 +
 gcc/config/i386/i386-isa.def  |  1 +
 gcc/config/i386/i386-options.cc   |  4 +-
 gcc/config/i386/i386.opt  |  4 ++
 gcc/config/i386/immintrin.h   |  2 +
 gcc/doc/extend.texi   |  5 ++
 gcc/doc/invoke.texi   | 11 ++--
 gcc/doc/sourcebuild.texi  |  3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |  2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |  2 +-
 gcc/testsuite/gcc.target/i386/amx-check.h |  3 +
 gcc/testsuite/gcc.target/i386/amx-helper.h| 61 +++
 .../gcc.target/i386/amxfp16-asmatt-1.c| 13 
 .../gcc.target/i386/amxfp16-asmintel-1.c  | 10 +++
 .../gcc.target/i386/amxfp16-dpfp16ps-2.c  | 57 +
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |  2 +
 gcc/testsuite/gcc.target/i386/sse-12.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-13.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-14.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-22.c|  4 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|  2 +-
 gcc/testsuite/lib/target-supports.exp | 11 
 29 files changed, 262 insertions(+), 14 deletions(-)
 create mode 100644 gcc/config/i386/amxfp16intrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/amx-helper.h
 create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-asmatt-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index cc499c46ed0..118f3a42abd 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -813,6 +813,11 @@ get_available_features (struct __processor_model 
*cpu_model,
  if (eax & bit_AVX512BF16)
set_feature (FEATURE_AVX512BF16);
}
+  if (amx_usable)
+   {
+ if (eax & bit_AMX_FP16)
+   set_feature (FEATURE_AMX_FP16);
+   }
 }
 
   /* Get Advanced Features at level 0xd (eax = 0xd, ecx = 1). */
diff --git 

[PATCH 4/6] Support Intel AVX-NE-CONVERT

2022-10-14 Thread Haochen Jiang via Gcc-patches
From: Kong Lingling 

gcc/ChangeLog:

* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_AVXNECONVERT_SET,
OPTION_MASK_ISA2_AVXNECONVERT_UNSET): New.
(ix86_handle_option): Handle -mavxneconvert, unset
avxneconvert when avx2 is disabled.
* common/config/i386/i386-cpuinfo.h (processor_types): Add
FEATURE_AVXNECONVERT.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
avxneconvert.
* common/config/i386/cpuinfo.h (get_available_features):
Detect avxneconvert.
* config.gcc: Add avxneconvertintrin.h
* config/i386/avxneconvertintrin.h: New.
* config/i386/cpuid.h (bit_AVXNECONVERT): New.
* config/i386/i386-builtin-types.def: Add
DEF_POINTER_TYPE (PCV8HF, V8HF, CONST),
DEF_POINTER_TYPE (PCV16HF, V16HF, CONST),
DEF_FUNCTION_TYPE (V4SF, PCSHORT),
DEF_FUNCTION_TYPE (V8SF, PCSHORT),
DEF_FUNCTION_TYPE (V4SF, PCV8BF),
DEF_FUNCTION_TYPE (V4SF, PCV8BF),
DEF_FUNCTION_TYPE (V8SF, PCV16HF),
DEF_FUNCTION_TYPE (V8SF, PCV16BF).
* config/i386/i386-builtin.def: Add new builtins.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__AVXNECONVERT__.
* config/i386/i386-expand.cc (ix86_expand_special_args_builtin):
Handle V4SF_FTYPE_PCSHORT,V8SF_FTYPE_PCSHORT,V4SF_FTYPE_PCV8BF,
V4SF_FTYPE_PCV8HF,V8SF_FTYPE_PCV16BF,V8SF_FTYPE_PCV16HF.
* config/i386/i386-isa.def : Add DEF_PTA(AVXNECONVERT) New.
* config/i386/i386-options.cc (isa2_opts): Add -mavxneconvert.
(ix86_valid_target_attribute_inner_p): Handle avxneconvert.
* config/i386/i386.opt: Add option -mavxneconvert.
* config/i386/immintrin.h: Inculde avxneconvertintrin.h.
* config/i386/sse.md: (avx_vbcstne2ps_),
(avx_vcvtne2ps_),
(avx_vcvtne2ps_),
(avx_vcvtneps2bf16_): New define_insn
(avx512f_cvtneps2bf16_):Ditto.
(avx512f_cvtneps2bf16__mask):Ditto.
* doc/invoke.texi: Document -mavxneconvert.
* doc/extend.texi: Document avxneconvert.
* doc/sourcebuild.texi: Document target avxneconvert.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx-check.h: Add avxneconvert check.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/sse-12.c: Add -mavxneconvert.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* g++.dg/other/i386-2.C: Ditto.
* g++.dg/other/i386-3.C: Ditto.
* lib/target-supports.exp:add check_effective_target_avxneconvert.
* gcc.target/i386/avx-ne-convert-1.c: New test.
* gcc.target/i386/avx-ne-convert-vbcstnebf162ps-2.c: Ditto.
* gcc.target/i386/avx-ne-convert-vbcstnesh2ps-2.c: Ditto.
* gcc.target/i386/avx-ne-convert-vcvtneebf162ps-2.c: Ditto.
* gcc.target/i386/avx-ne-convert-vcvtneeph2ps-2.c: Ditto.
* gcc.target/i386/avx-ne-convert-vcvtneobf162ps-2.c: Ditto.
* gcc.target/i386/avx-ne-convert-vcvtneoph2ps-2.c: Ditto.
* gcc.target/i386/avx-ne-convert-vcvtneps2bf16-2.c: Ditto.
* gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c: Rename..
* gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1a.c: To this.
* gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1b.c: New test.
---
 gcc/common/config/i386/cpuinfo.h  |   2 +
 gcc/common/config/i386/i386-common.cc |  21 ++-
 gcc/common/config/i386/i386-cpuinfo.h |   1 +
 gcc/common/config/i386/i386-isas.h|   2 +
 gcc/config.gcc|   2 +-
 gcc/config/i386/avxneconvertintrin.h  | 140 ++
 gcc/config/i386/cpuid.h   |   1 +
 gcc/config/i386/i386-builtin-types.def|  17 +++
 gcc/config/i386/i386-builtin.def  |  18 +++
 gcc/config/i386/i386-c.cc |   2 +
 gcc/config/i386/i386-expand.cc|   8 +
 gcc/config/i386/i386-isa.def  |   1 +
 gcc/config/i386/i386-options.cc   |   4 +-
 gcc/config/i386/i386.opt  |   5 +
 gcc/config/i386/immintrin.h   |   4 +
 gcc/config/i386/sse.md| 100 -
 gcc/doc/extend.texi   |   5 +
 gcc/doc/invoke.texi   |   9 +-
 gcc/doc/sourcebuild.texi  |   3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |   2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |   2 +-
 gcc/testsuite/gcc.target/i386/avx-check.h |   3 +
 .../gcc.target/i386/avx-ne-convert-1.c|  45 ++
 .../i386/avx-ne-convert-vbcstnebf162ps-2.c|  54 +++
 .../i386/avx-ne-convert-vbcstnesh2ps-2.c  |  42 ++
 .../i386/avx-ne-convert-vcvtneebf162ps-2.c|  73 +
 

[PATCH 5/6] Support Intel CMPccXADD

2022-10-14 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features):
Detect cmpccxadd.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_CMPCCXADD_SET,
OPTION_MASK_ISA2_CMPCCXADD_UNSET): New.
(ix86_handle_option): Handle -mcmpccxadd, unset cmpccxadd when avx2
is disabled.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_CMPCCXADD.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
cmpccxadd.
* config.gcc: Add cmpccxaddintrin.h.
* config/i386/cpuid.h (bit_CMPCCXADD): New.
* config/i386/i386-builtin-types.def:
Add DEF_FUNCTION_TYPE(INT, PINT, INT, INT, INT)
and DEF_FUNCTION_TYPE(LONGLONG, PLONGLONG, LONGLONG, LONGLONG, INT).
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__CMPCCXADD__.
* config/i386/i386-expand.cc (ix86_expand_special_args_builtin):
Add new parameter to indicate constant position.
Handle INT_FTYPE_PINT_INT_INT_INT
and LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT.
* config/i386/i386-isa.def (CMPCCXADD): Add DEF_PTA(CMPCCXADD).
* config/i386/i386-options.cc (isa2_opts): Add -mcmpccxadd.
(ix86_valid_target_attribute_inner_p): Handle cmpccxadd.
* config/i386/i386.opt: Add option -mcmpccxadd.
* config/i386/sync.md (cmpccxadd_): New define insn.
* config/i386/x86gprintrin.h: Include cmpccxaddintrin.h.
* doc/extend.texi: Document cmpccxadd.
* doc/invoke.texi: Document -mcmpccxadd.
* doc/sourcebuild.texi: Document target cmpccxadd.
* config/i386/cmpccxaddintrin.h: New file.

gcc/testsuite/ChangeLog:

* g++.dg/other/i386-2.C: Add -mcmpccxadd.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/avx-1.c: Add builtin define for enum.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/sse-13.c: Add builtin define for enum.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/x86gprintrin-1.c: Add -mcmpccxadd for 64 bit target.
* gcc.target/i386/x86gprintrin-2.c: Add -mcmpccxadd for 64 bit target.
Add builtin define for enum.
* gcc.target/i386/x86gprintrin-3.c: Add -mcmpccxadd for 64 bit target.
* gcc.target/i386/x86gprintrin-4.c: Add mcmpccxadd for 64 bit target.
* gcc.target/i386/x86gprintrin-5.c: Add mcpmccxadd for 64 bit target.
Add builtin define for enum.
* gcc.target/i386/cmpccxadd-1.c: New test.
* gcc.target/i386/cmpccxadd-2.c: New test.
---
 gcc/common/config/i386/cpuinfo.h  |   2 +
 gcc/common/config/i386/i386-common.cc |  15 ++
 gcc/common/config/i386/i386-cpuinfo.h |   1 +
 gcc/common/config/i386/i386-isas.h|   1 +
 gcc/config.gcc|   3 +-
 gcc/config/i386/cmpccxaddintrin.h |  89 +++
 gcc/config/i386/cpuid.h   |   1 +
 gcc/config/i386/i386-builtin-types.def|   4 +
 gcc/config/i386/i386-builtin.def  |   4 +
 gcc/config/i386/i386-c.cc |   2 +
 gcc/config/i386/i386-expand.cc|  22 ++-
 gcc/config/i386/i386-isa.def  |   1 +
 gcc/config/i386/i386-options.cc   |   4 +-
 gcc/config/i386/i386.opt  |   5 +
 gcc/config/i386/sync.md   |  42 ++
 gcc/config/i386/x86gprintrin.h|   2 +
 gcc/doc/extend.texi   |   5 +
 gcc/doc/invoke.texi   |  10 +-
 gcc/doc/sourcebuild.texi  |   3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |   2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |   2 +-
 gcc/testsuite/gcc.target/i386/avx-1.c |   4 +
 gcc/testsuite/gcc.target/i386/cmpccxadd-1.c   |  61 
 gcc/testsuite/gcc.target/i386/cmpccxadd-2.c   | 138 ++
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
 gcc/testsuite/gcc.target/i386/sse-13.c|   6 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|   6 +-
 .../gcc.target/i386/x86gprintrin-1.c  |   2 +-
 .../gcc.target/i386/x86gprintrin-2.c  |   6 +-
 .../gcc.target/i386/x86gprintrin-3.c  |   2 +-
 .../gcc.target/i386/x86gprintrin-4.c  |   2 +-
 .../gcc.target/i386/x86gprintrin-5.c  |   6 +-
 gcc/testsuite/lib/target-supports.exp |  10 ++
 33 files changed, 450 insertions(+), 15 deletions(-)
 create mode 100644 gcc/config/i386/cmpccxaddintrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/cmpccxadd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/cmpccxadd-2.c

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index e9fd586704d..f73834b086c 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ 

[PATCH 2/6] Support Intel AVX-VNNI-INT8

2022-10-14 Thread Haochen Jiang via Gcc-patches
From: Kong Lingling 

gcc/ChangeLog

* common/config/i386/cpuinfo.h (get_available_features): Detect
avxvnniint8.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_AVXVNNIINT8_SET): New.
(OPTION_MASK_ISA2_AVXVNNIINT8_UNSET): Ditto.
(ix86_handle_option): Handle -mavxvnniint8.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_AVXVNNIINT8.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
avxvnniint8.
* config.gcc: Add avxvnniint8intrin.h.
* config/i386/avxvnniint8intrin.h: New file.
* config/i386/cpuid.h (bit_AVXVNNIINT8): New.
* config/i386/i386-builtin.def: Add new builtins.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__AVXVNNIINT8__.
* config/i386/i386-options.cc (isa2_opts): Add -mavxvnniint8.
(ix86_valid_target_attribute_inner_p): Handle avxvnniint8.
* config/i386/i386-isa.def: Add DEF_PTA(AVXVNNIINT8) New..
* config/i386/i386.opt: Add option -mavxvnniint8.
* config/i386/immintrin.h: Include avxvnniint8intrin.h.
* config/i386/sse.md
(vpdp_): New define_insn.
* doc/extend.texi: Document avxvnniint8.
* doc/invoke.texi: Document -mavxvnniint8.
* doc/sourcebuild.texi: Document target avxvnniint8.

gcc/testsuite/ChangeLog

* g++.dg/other/i386-2.C: Add -mavxvnniint8.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/avx-check.h: Add avxvnniint8 check.
* gcc.target/i386/sse-12.c: Add -mavxvnniint8.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* lib/target-supports.exp
(check_effective_target_avxvnniint8): New.
* gcc.target/i386/avxvnniint8-1.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbssd-2.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbssds-2.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbsud-2.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbsuds-2.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbuud-2.c: Ditto.
* gcc.target/i386/avxvnniint8-vpdpbuuds-2.c: Ditto.

Co-authored-by: Hongyu Wang 
Co-authored-by: Haochen Jiang 
---
 gcc/common/config/i386/cpuinfo.h  |   2 +
 gcc/common/config/i386/i386-common.cc |  22 ++-
 gcc/common/config/i386/i386-cpuinfo.h |   1 +
 gcc/common/config/i386/i386-isas.h|   2 +
 gcc/config.gcc|   2 +-
 gcc/config/i386/avxvnniint8intrin.h   | 138 ++
 gcc/config/i386/cpuid.h   |   1 +
 gcc/config/i386/i386-builtin.def  |  14 ++
 gcc/config/i386/i386-c.cc |   2 +
 gcc/config/i386/i386-isa.def  |   1 +
 gcc/config/i386/i386-options.cc   |   4 +-
 gcc/config/i386/i386.opt  |   5 +
 gcc/config/i386/immintrin.h   |   2 +
 gcc/config/i386/sse.md|  31 
 gcc/doc/extend.texi   |   5 +
 gcc/doc/invoke.texi   |   9 +-
 gcc/doc/sourcebuild.texi  |   3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |   2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |   2 +-
 gcc/testsuite/gcc.target/i386/avx-check.h |   3 +
 gcc/testsuite/gcc.target/i386/avxvnniint8-1.c |  43 ++
 .../gcc.target/i386/avxvnniint8-vpdpbssd-2.c  |  72 +
 .../gcc.target/i386/avxvnniint8-vpdpbssds-2.c |  72 +
 .../gcc.target/i386/avxvnniint8-vpdpbsud-2.c  |  72 +
 .../gcc.target/i386/avxvnniint8-vpdpbsuds-2.c |  72 +
 .../gcc.target/i386/avxvnniint8-vpdpbuud-2.c  |  72 +
 .../gcc.target/i386/avxvnniint8-vpdpbuuds-2.c |  72 +
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
 gcc/testsuite/gcc.target/i386/sse-12.c|   2 +-
 gcc/testsuite/gcc.target/i386/sse-13.c|   2 +-
 gcc/testsuite/gcc.target/i386/sse-14.c|   2 +-
 gcc/testsuite/gcc.target/i386/sse-22.c|   4 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|   2 +-
 gcc/testsuite/lib/target-supports.exp |  12 ++
 34 files changed, 738 insertions(+), 14 deletions(-)
 create mode 100644 gcc/config/i386/avxvnniint8intrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssds-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsud-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsuds-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuud-2.c
 create mode 100644 

[PATCH 6/6] Initial Sierra Forest Support

2022-10-14 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_intel_cpu):
Add Sierra Forest.
* common/config/i386/i386-common.cc
(processor_names): Add Sierra Forest.
(processor_alias_table): Ditto.
* common/config/i386/i386-cpuinfo.h
(enum processor_types): Add INTEL_SIERRAFOREST.
* config.gcc: Add -march=sierraforest.
* config/i386/driver-i386.cc (host_detect_local_cpu):
Handle Sierra Forest.
* config/i386/i386-c.cc (ix86_target_macros_internal):
Ditto.
* config/i386/i386-options.cc (m_SIERRAFOREST): New define.
(processor_cost_table): Add sierra forest.
* config/i386/i386.h (enum processor_type):
Add PROCESSOR_SIERRA_FOREST.
(PTA_SIERRAFOREST): Ditto.
* doc/extend.texi: Add sierra forest.
* doc/invoke.texi: Ditto.

gcc/testsuite/ChangeLog:

* g++.target/i386/mv16.C: Add sierra forest.
* gcc.target/i386/funcspec-56.inc: Handle new march.
---
 gcc/common/config/i386/cpuinfo.h  | 6 ++
 gcc/common/config/i386/i386-common.cc | 3 +++
 gcc/common/config/i386/i386-cpuinfo.h | 1 +
 gcc/config.gcc| 3 ++-
 gcc/config/i386/driver-i386.cc| 5 -
 gcc/config/i386/i386-c.cc | 7 +++
 gcc/config/i386/i386-options.cc   | 2 ++
 gcc/config/i386/i386.h| 3 +++
 gcc/doc/extend.texi   | 3 +++
 gcc/doc/invoke.texi   | 8 
 gcc/testsuite/g++.target/i386/mv16.C  | 6 ++
 gcc/testsuite/gcc.target/i386/funcspec-56.inc | 1 +
 12 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index f73834b086c..cc499c46ed0 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -516,6 +516,12 @@ get_intel_cpu (struct __processor_model *cpu_model,
   cpu_model->__cpu_type = INTEL_COREI7;
   cpu_model->__cpu_subtype = INTEL_COREI7_SAPPHIRERAPIDS;
   break;
+case 0xaf:
+  /* Sierra Forest.  */
+  cpu = "sierraforest";
+  CHECK___builtin_cpu_is ("sierraforest");
+  cpu_model->__cpu_type = INTEL_SIERRAFOREST;
+  break;
 case 0x17:
 case 0x1d:
   /* Penryn.  */
diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index 75966779d82..6ccc4d2f03c 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -1874,6 +1874,7 @@ const char *const processor_names[] =
   "goldmont",
   "goldmont-plus",
   "tremont",
+  "sierraforest",
   "knl",
   "knm",
   "skylake",
@@ -2019,6 +2020,8 @@ const pta processor_alias_table[] =
 M_CPU_TYPE (INTEL_GOLDMONT_PLUS), P_PROC_SSE4_2},
   {"tremont", PROCESSOR_TREMONT, CPU_HASWELL, PTA_TREMONT,
 M_CPU_TYPE (INTEL_TREMONT), P_PROC_SSE4_2},
+  {"sierraforest", PROCESSOR_SIERRAFOREST, CPU_HASWELL, PTA_SIERRAFOREST,
+M_CPU_SUBTYPE (INTEL_SIERRAFOREST), P_PROC_AVX2},
   {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL,
 M_CPU_TYPE (INTEL_KNL), P_PROC_AVX512F},
   {"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM,
diff --git a/gcc/common/config/i386/i386-cpuinfo.h 
b/gcc/common/config/i386/i386-cpuinfo.h
index 5a61d817007..a71a10ebbd7 100644
--- a/gcc/common/config/i386/i386-cpuinfo.h
+++ b/gcc/common/config/i386/i386-cpuinfo.h
@@ -58,6 +58,7 @@ enum processor_types
   INTEL_TREMONT,
   AMDFAM19H,
   ZHAOXIN_FAM7H,
+  INTEL_SIERRAFOREST,
   CPU_TYPE_MAX,
   BUILTIN_CPU_TYPE_MAX = CPU_TYPE_MAX
 };
diff --git a/gcc/config.gcc b/gcc/config.gcc
index fe063bfbb26..c0e10a72bd5 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -665,7 +665,8 @@ slm nehalem westmere sandybridge ivybridge haswell 
broadwell bonnell \
 silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \
 skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
 sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 
\
-nano-x2 eden-x4 nano-x4 lujiazui x86-64 x86-64-v2 x86-64-v3 x86-64-v4 native"
+nano-x2 eden-x4 nano-x4 lujiazui x86-64 x86-64-v2 x86-64-v3 x86-64-v4 \
+sierraforest native"
 
 # Additional x86 processors supported by --with-cpu=.  Each processor
 # MUST be separated by exactly one space.
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
index ef567045c67..be205a56ea2 100644
--- a/gcc/config/i386/driver-i386.cc
+++ b/gcc/config/i386/driver-i386.cc
@@ -589,8 +589,11 @@ const char *host_detect_local_cpu (int argc, const char 
**argv)
  /* This is unknown family 0x6 CPU.  */
  if (has_feature (FEATURE_AVX))
{
+ /* Assume Sierra Forest.  */
+ if (has_feature (FEATURE_AVXVNNIINT8))
+   cpu = "sierraforest";
  /* Assume Tiger Lake */
- if (has_feature 

[PATCH 3/6] i386: Add intrinsic for vector __bf16

2022-10-14 Thread Haochen Jiang via Gcc-patches
From: konglin1 

gcc/ChangeLog:

* config/i386/avx512fp16intrin.h : New intrinsic.
(_mm_load_sbf16): Ditto.
(_mm_mask_load_sbf16): Ditto.
(_mm_maskz_load_sbf16): Ditto.
(_mm_mask_store_sbf16): Ditto.
(_mm_mask_move_sbf16): Ditto.
(_mm_maskz_move_sbf16): Ditto.
* config/i386/avx512bf16intrin.h: New intrinsic.
(_mm_setzero_pbf16): Ditto.
(_mm256_setzero_pbf16): Ditto.
(_mm512_setzero_pbf16): Ditto.
(_mm512_undefined_pbf16): Ditto.
(_mm512_set1_pbf16): Ditto.
(_mm512_set_pbf16): Ditto.
(_mm512_setr_pbf16): Ditto.
(_mm_castpbf16_ps): Ditto.
(_mm256_castpbf16_ps): Ditto.
(_mm512_castpbf16_ps): Ditto.
(_mm_castpbf16_pd): Ditto.
(_mm256_castpbf16_pd): Ditto.
(_mm512_castpbf16_pd): Ditto.
(_mm_castpbf16_si128): Ditto.
(_mm256_castpbf16_si256): Ditto.
(_mm512_castpbf16_si512): Ditto.
(_mm_castps_pbf16): Ditto.
(_mm256_castps_pbf16): Ditto.
(_mm512_castps_pbf16): Ditto.
(_mm_castpd_pbf16): Ditto.
(_mm256_castpd_pbf16): Ditto.
(_mm512_castpd_pbf16): Ditto.
(_mm_castsi128_pbf16): Ditto.
(_mm256_castsi256_pbf16): Ditto.
(_mm512_castsi512_pbf16): Ditto.
(_mm256_castpbf16256_pbf16128): Ditto.
(_mm512_castpbf16512_pbf16128): Ditto.
(_mm512_castpbf16512_pbf16256): Ditto.
(_mm256_castpbf16128_pbf16256): Ditto.
(_mm512_castpbf16128_pbf16512): Ditto.
(_mm512_castpbf16256_pbf16512): Ditto.
(_mm256_zextpbf16128_pbf16256): Ditto.
(_mm512_zextpbf16128_pbf16512): Ditto.
(_mm512_zextpbf16256_pbf16512): Ditto.
(_mm512_abs_pbf16): Ditto.
(_mm512_load_pbf16): Ditto.
(_mm256_load_pbf16): Ditto.
(_mm_load_pbf16): Ditto.
(_mm512_loadu_pbf16): Ditto.
(_mm256_loadu_pbf16): Ditto.
(_mm_loadu_pbf16): Ditto.
(_mm_store_sbf16): Ditto.
(_mm512_store_pbf16): Ditto.
(_mm256_store_pbf16): Ditto.
(_mm_store_pbf16): Ditto.
(_mm512_storeu_pbf16): Ditto.
(_mm256_storeu_pbf16): Ditto.
(_mm_storeu_pbf16): Ditto.
(_mm_move_sbf16): Ditto.
(_mm512_mask_blend_pbf16): Ditto.
(_mm512_permutex2var_pbf16): Ditto.
(_mm512_permutexvar_pbf16): Ditto.
(_mm512_bcstnebf16_ps): Ditto.
(_mm512_mask_bcstnebf16_ps): Ditto.
(_mm512_bcstnesh_ps): Ditto.
(_mm512_mask_bcstnesh_ps): Ditto.
(_mm512_maskz_bcstnesh_ps): Ditto.
(_mm512_cvtne2ps_ph): Ditto.
(_mm512_mask_cvtne2ps_ph): Ditto.
(_mm512_cvtne_round2ps_ph): Ditto.
(_mm512_mask_cvtne_round2ps_ph): Ditto.
(_mm512_cvtneebf16_ps): Ditto.
(_mm512_mask_cvtneebf16_ps): Ditto.
(_mm512_maskz_cvtneebf16_ps): Ditto.
(_mm512_cvtneeph_ps): Ditto.
(_mm512_mask_cvtneeph_ps): Ditto.
(_mm512_cvtneobf16_ps): Ditto.
(_mm512_mask_cvtneobf16_ps): Ditto.
(_mm512_maskz_cvtneobf16_ps): Ditto.
(_mm512_cvtneoph_ps): Ditto.
(_mm512_mask_cvtneoph_ps): Ditto.
* config/i386/avx512bf16vlintrin.h (__attribute__): Ditto.
(_mm_cvtsbf16_bf16): Ditto.
(_mm256_cvtsbf16_bf16): Ditto.
(_mm256_undefined_pbf16): Ditto.
(_mm_undefined_pbf16): Ditto.
(_mm_set_sbf16): Ditto.
(_mm_set1_pbf16): Ditto.
(_mm256_set1_pbf16): Ditto.
(_mm_set_pbf16): Ditto.
(_mm256_set_pbf16): Ditto.
(_mm_setr_pbf16): Ditto.
(_mm256_setr_pbf16): Ditto.
(_mm256_abs_pbf16): Ditto.
(_mm_abs_pbf16): Ditto.
(_mm_mask_blend_pbf16): Ditto.
(_mm256_mask_blend_pbf16): Ditto.
(_mm_permutex2var_pbf16): Ditto.
(_mm256_permutex2var_pbf16): Ditto.
(_mm_permutexvar_pbf16): Ditto.
(_mm256_permutexvar_pbf16): Ditto.
(_mm_cvtneebf16_ps): Change bf16 mode.
(_mm256_cvtneebf16_ps): Diito.
(_mm_cvtneobf16_ps): Diito.
(_mm256_cvtneobf16_ps): Diito.
(_mm_mask_cvtneebf16_ps): Diito.
(_mm_maskz_cvtneebf16_ps): Diito.
(_mm256_mask_cvtneebf16_ps): Diito.
(_mm256_maskz_cvtneebf16_ps): Diito.
(_mm_mask_cvtneobf16_ps): Diito.
(_mm_maskz_cvtneobf16_ps): Diito.
(_mm256_mask_cvtneobf16_ps): Diito.
(_mm256_maskz_cvtneobf16_ps): Diito.
* config/i386/immintrin.h: Add SSE2 depend for avx512bf16.
---
 gcc/config/i386/avx512bf16intrin.h   | 418 +++
 gcc/config/i386/avx512bf16vlintrin.h | 177 
 gcc/config/i386/avx512fp16intrin.h   |  70 +
 gcc/config/i386/immintrin.h  |   2 +
 4 files changed, 667 insertions(+)

diff --git a/gcc/config/i386/avx512bf16intrin.h 
b/gcc/config/i386/avx512bf16intrin.h
index b6e9ddad157..d09a59c1509 100644
--- a/gcc/config/i386/avx512bf16intrin.h
+++ 

[PATCH 1/6] Support Intel AVX-IFMA

2022-10-14 Thread Haochen Jiang via Gcc-patches
From: Hongyu Wang 

gcc/
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA_AVXIFMA_SET, OPTION_MASK_ISA2_AVXIFMA_UNSET,
OPTION_MASK_ISA2_AVX2_UNSET): New macro.
(ix86_handle_option): Handle -mavxifma.
* commmon/config/i386/i386-cpuinfo.h (processor_types): Add
FEATURE_AVXIFMA.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
avxifma.
* common/config/i386/cpuinfo.h (get_available_features):
Detect avxifma.
* config.gcc: Add avxifmaintrin.h
* config/i386/avxifmaintrin.h: New.
* config/i386/cpuid.h (bit_AVXIFMA): New.
* config/i386/i386-builtin.def: Add new builtins.
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
__AVXIFMA__.
* config/i386/i386-options.cc (isa2_opts): Add -mavxifma.
(ix86_valid_target_attribute_inner_p): Handle avxifma.
* config/i386/i386.h (TARGET_AVXIFMA, TARGET_AVXIFMA_P,
PTA_AVXIFMA): New.
* config/i386/i386.opt: Add option -mavxifma.
* config/i386/immintrin.h: Inculde avxifmaintrin.h.
* config/i386/sse.md
(vpamdd52): Remove.
(avx_vpmadd52_,
vpamdd52,
vpamdd52_maskz_1): New define_insn.
* doc/invoke.texi: Document -mavxifma.
* doc/extend.texi: Document avxifma.
* doc/sourcebuild.text: Document target avxifma.

gcc/testsuite/
* gcc.target/i386/avx512ifma-vpmaddhuq-1.c: Remane..
* gcc.target/i386/avx512ifma-vpmaddhuq-1a.c: To this.
* gcc.target/i386/avx512ifma-vpmaddluq-1.c: Ditto.
* gcc.target/i386/avx512ifma-vpmaddluq-1a.c: Ditto.
* gcc.target/i386/avx512vl-vpmaddhuq-2.c: Ditto.
* gcc.target/i386/avx512vl-vpmaddhuq-2a.c: Ditto.
* gcc.target/i386/avx512vl-vpmaddluq-2.c: Ditto.
* gcc.target/i386/avx512vl-vpmaddluq-2a.c: Ditto.
* gcc.target/i386/avx-check.h: Add avxifma check.
* gcc.target/i386/avx512ifma-vpmaddhuq-1b.c: New Test.
* gcc.target/i386/avx512ifma-vpmaddluq-1b.c: Ditto.
* gcc.target/i386/avx512vl-vpmaddhuq-2b.c: Ditto.
* gcc.target/i386/avx512vl-vpmaddluq-2b.c: Ditto.
* gcc.target/i386/avx-ifma-1.c: Ditto.
* gcc.target/i386/avx-ifma-vpmaddhuq-2.c: Ditto.
* gcc.target/i386/avx-ifma-vpmaddluq-2.c: Ditto.
* gcc.target/i386/sse-12.c: Add -mavxifma.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* g++.dg/other/i386-2.C: Ditto.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/builtin_target.c: Detect avxifma.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* lib/target-supports.exp
(check_effective_target_avxifma): New.
---
 gcc/common/config/i386/cpuinfo.h  |  2 +
 gcc/common/config/i386/i386-common.cc | 20 -
 gcc/common/config/i386/i386-cpuinfo.h |  1 +
 gcc/common/config/i386/i386-isas.h|  1 +
 gcc/config.gcc|  3 +-
 gcc/config/i386/avxifmaintrin.h   | 78 +++
 gcc/config/i386/cpuid.h   |  1 +
 gcc/config/i386/i386-builtin.def  |  6 ++
 gcc/config/i386/i386-c.cc |  2 +
 gcc/config/i386/i386-isa.def  |  1 +
 gcc/config/i386/i386-options.cc   |  4 +-
 gcc/config/i386/i386.opt  |  5 ++
 gcc/config/i386/immintrin.h   |  2 +
 gcc/config/i386/sse.md| 42 +-
 gcc/doc/extend.texi   |  5 ++
 gcc/doc/invoke.texi   |  9 ++-
 gcc/doc/sourcebuild.texi  |  3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |  2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |  2 +-
 gcc/testsuite/gcc.target/i386/avx-check.h |  6 +-
 gcc/testsuite/gcc.target/i386/avx-ifma-1.c| 20 +
 .../gcc.target/i386/avx-ifma-vpmaddhuq-2.c| 72 +
 .../gcc.target/i386/avx-ifma-vpmaddluq-2.c| 61 +++
 ...pmaddhuq-1.c => avx512ifma-vpmaddhuq-1a.c} |  0
 .../gcc.target/i386/avx512ifma-vpmaddhuq-1b.c | 33 
 ...pmaddluq-1.c => avx512ifma-vpmaddluq-1a.c} |  0
 .../gcc.target/i386/avx512ifma-vpmaddluq-1b.c | 33 
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |  2 +
 gcc/testsuite/gcc.target/i386/sse-12.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-13.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-14.c|  2 +-
 gcc/testsuite/gcc.target/i386/sse-22.c|  4 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|  2 +-
 gcc/testsuite/lib/target-supports.exp | 12 +++
 34 files changed, 423 insertions(+), 17 deletions(-)
 create mode 100644 gcc/config/i386/avxifmaintrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/avx-ifma-1.c
 create mode 100644 

[PATCH 0/6] Add Intel Sierra Forest Instructions

2022-10-14 Thread Haochen Jiang via Gcc-patches
Hi all,

These six patches aimed to add Intel Sierra Forest instructions, including
AVX-IFMA, AVX-VNNI0INT8, AVX-NE-CONVERT, CMPccXADD. We also added intrinsic
for vector __bf16 in this series of patch and Sierra Forest Support.

The information is based on newly released
Intel Architecture Instruction Set Extensions and Future Features.

The document comes following:
https://www.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen




[PATCH 1/2] Initial Raptorlake Support

2022-10-14 Thread Haochen Jiang via Gcc-patches
gcc/ChangeLog:

* common/config/i386/cpuinfo.h:
(get_intel_cpu): Handle Raptorlake.
* common/config/i386/i386-common.cc:
(processor_alias_table): Add Raptorlake.
---
 gcc/common/config/i386/cpuinfo.h  | 2 ++
 gcc/common/config/i386/i386-common.cc | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index bbced8a23b9..e759e6f89fa 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -496,6 +496,8 @@ get_intel_cpu (struct __processor_model *cpu_model,
 case 0x9a:
 case 0xbf:
   /* Alder Lake.  */
+case 0xb7:
+  /* Raptor Lake.  */
   cpu = "alderlake";
   CHECK___builtin_cpu_is ("corei7");
   CHECK___builtin_cpu_is ("alderlake");
diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index c0c2ad74d87..8d346245ddd 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -1929,6 +1929,8 @@ const pta processor_alias_table[] =
 M_CPU_SUBTYPE (INTEL_COREI7_SAPPHIRERAPIDS), P_PROC_AVX512F},
   {"alderlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
 M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
+  {"raptorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
+M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
   {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
 M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
   {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
-- 
2.18.1



[PATCH 0/2] Add new Intel Architecture

2022-10-14 Thread Haochen Jiang via Gcc-patches
Hi all,

These two patches aimed to add new Intel processors according to newly
released Intel Architecture Instruction Set Extensions and Future Features.

The document comes following:
https://www.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen





[PATCH 2/2] Initial Meteorlake Support

2022-10-14 Thread Haochen Jiang via Gcc-patches
From: "Hu, Lin1" 

gcc/ChangeLog:

* common/config/i386/cpuinfo.h:
(get_intel_cpu): Handle Meteorlake.
* common/config/i386/i386-common.cc:
(processor_alias_table): Add Meteorlake.
---
 gcc/common/config/i386/cpuinfo.h  | 4 
 gcc/common/config/i386/i386-common.cc | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index e759e6f89fa..b5c1b21e554 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -498,6 +498,10 @@ get_intel_cpu (struct __processor_model *cpu_model,
   /* Alder Lake.  */
 case 0xb7:
   /* Raptor Lake.  */
+case 0xb5:
+case 0xaa:
+case 0xac:
+  /* Meteor Lake.  */
   cpu = "alderlake";
   CHECK___builtin_cpu_is ("corei7");
   CHECK___builtin_cpu_is ("alderlake");
diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index 8d346245ddd..d6a68dc9b1d 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -1931,6 +1931,8 @@ const pta processor_alias_table[] =
 M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
   {"raptorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
 M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
+  {"meteorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
+M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
   {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
 M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
   {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
-- 
2.18.1



[PATCH] Add __m128bf16/__m256bf16/__m512bf16 type for bf16 abi test

2022-08-21 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch added __m128bf16/__m256bf16/__m512bf16 type in testcases.

BRs,
Haochen

gcc/testsuite/ChangeLog:

* gcc.target/x86_64/abi/bf16/bf16-helper.h:
Add _m128bf16/m256bf16/_m512bf16.
* gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h:
Include bf16-helper.h.
---
 gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-helper.h| 4 
 .../gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h  | 1 +
 2 files changed, 5 insertions(+)

diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-helper.h 
b/gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-helper.h
index 83d89fcf62c..e090a7254f4 100644
--- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-helper.h
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-helper.h
@@ -1,3 +1,7 @@
+typedef __bf16 __m128bf16 __attribute__((__vector_size__(16), 
__aligned__(16)));
+typedef __bf16 __m256bf16 __attribute__((__vector_size__(32), 
__aligned__(32)));
+typedef __bf16 __m512bf16 __attribute__((__vector_size__(64), 
__aligned__(64)));
+
 typedef union
 {
   float f;
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h 
b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h
index 8379fcfaf8c..9cd39b878dd 100644
--- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h
@@ -1,4 +1,5 @@
 #include 
+#include "../bf16-helper.h"
 
 static void do_test (void);
 
-- 
2.18.1



[PATCH] Add ABI test for __bf16 type

2022-08-18 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch aims to add bf16 abi test after the whole __bf16 type is added.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/testsuite/ChangeLog:

* gcc.target/x86_64/abi/bf16/abi-bf16.exp: New test.
* gcc.target/x86_64/abi/bf16/args.h: Ditto.
* gcc.target/x86_64/abi/bf16/asm-support.S: Ditto.
* gcc.target/x86_64/abi/bf16/bf16-check.h: Ditto.
* gcc.target/x86_64/abi/bf16/bf16-helper.h: Ditto.
* gcc.target/x86_64/abi/bf16/defines.h: Ditto.
* gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp: Ditto.
* gcc.target/x86_64/abi/bf16/m256bf16/args.h: Ditto.
* gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S: Ditto.
* gcc.target/x86_64/abi/bf16/m256bf16/bf16-ymm-check.h: Ditto.
* gcc.target/x86_64/abi/bf16/m256bf16/test_m256_returning.c: Ditto.
* gcc.target/x86_64/abi/bf16/m256bf16/test_passing_m256.c: Ditto.
* gcc.target/x86_64/abi/bf16/m256bf16/test_passing_structs.c: Ditto.
* gcc.target/x86_64/abi/bf16/m256bf16/test_passing_unions.c: Ditto.
* gcc.target/x86_64/abi/bf16/m256bf16/test_varargs-m256.c: Ditto.
* gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp: Ditto.
* gcc.target/x86_64/abi/bf16/m512bf16/args.h: Ditto.
* gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S: Ditto.
* gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h: Ditto.
* gcc.target/x86_64/abi/bf16/m512bf16/test_m512_returning.c: Ditto.
* gcc.target/x86_64/abi/bf16/m512bf16/test_passing_m512.c: Ditto.
* gcc.target/x86_64/abi/bf16/m512bf16/test_passing_structs.c: Ditto.
* gcc.target/x86_64/abi/bf16/m512bf16/test_passing_unions.c: Ditto.
* gcc.target/x86_64/abi/bf16/m512bf16/test_varargs-m512.c: Ditto.
* gcc.target/x86_64/abi/bf16/macros.h: Ditto.
* gcc.target/x86_64/abi/bf16/test_3_element_struct_and_unions.c: Ditto.
* gcc.target/x86_64/abi/bf16/test_basic_alignment.c: Ditto.
* gcc.target/x86_64/abi/bf16/test_basic_array_size_and_align.c: Ditto.
* gcc.target/x86_64/abi/bf16/test_basic_returning.c: Ditto.
* gcc.target/x86_64/abi/bf16/test_basic_sizes.c: Ditto.
* gcc.target/x86_64/abi/bf16/test_basic_struct_size_and_align.c: Ditto.
* gcc.target/x86_64/abi/bf16/test_basic_union_size_and_align.c: Ditto.
* gcc.target/x86_64/abi/bf16/test_m128_returning.c: Ditto.
* gcc.target/x86_64/abi/bf16/test_passing_floats.c: Ditto.
* gcc.target/x86_64/abi/bf16/test_passing_m128.c: Ditto.
* gcc.target/x86_64/abi/bf16/test_passing_structs.c: Ditto.
* gcc.target/x86_64/abi/bf16/test_passing_unions.c: Ditto.
* gcc.target/x86_64/abi/bf16/test_struct_returning.c: Ditto.
* gcc.target/x86_64/abi/bf16/test_varargs-m128.c: Ditto.
---
 .../gcc.target/x86_64/abi/bf16/abi-bf16.exp   |  46 +++
 .../gcc.target/x86_64/abi/bf16/args.h | 164 +
 .../gcc.target/x86_64/abi/bf16/asm-support.S  |  84 +
 .../gcc.target/x86_64/abi/bf16/bf16-check.h   |  24 ++
 .../gcc.target/x86_64/abi/bf16/bf16-helper.h  |  41 +++
 .../gcc.target/x86_64/abi/bf16/defines.h  | 163 +
 .../x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp |  46 +++
 .../x86_64/abi/bf16/m256bf16/args.h   | 152 +
 .../x86_64/abi/bf16/m256bf16/asm-support.S|  84 +
 .../x86_64/abi/bf16/m256bf16/bf16-ymm-check.h |  24 ++
 .../abi/bf16/m256bf16/test_m256_returning.c   |  38 +++
 .../abi/bf16/m256bf16/test_passing_m256.c | 235 +
 .../abi/bf16/m256bf16/test_passing_structs.c  |  69 
 .../abi/bf16/m256bf16/test_passing_unions.c   | 179 ++
 .../abi/bf16/m256bf16/test_varargs-m256.c | 107 ++
 .../x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp |  46 +++
 .../x86_64/abi/bf16/m512bf16/args.h   | 155 +
 .../x86_64/abi/bf16/m512bf16/asm-support.S| 100 ++
 .../x86_64/abi/bf16/m512bf16/bf16-zmm-check.h |  23 ++
 .../abi/bf16/m512bf16/test_m512_returning.c   |  44 +++
 .../abi/bf16/m512bf16/test_passing_m512.c | 243 ++
 .../abi/bf16/m512bf16/test_passing_structs.c  |  77 +
 .../abi/bf16/m512bf16/test_passing_unions.c   | 222 +
 .../abi/bf16/m512bf16/test_varargs-m512.c | 111 +++
 .../gcc.target/x86_64/abi/bf16/macros.h   |  53 +++
 .../bf16/test_3_element_struct_and_unions.c   | 214 
 .../x86_64/abi/bf16/test_basic_alignment.c|  14 +
 .../bf16/test_basic_array_size_and_align.c|  13 +
 .../x86_64/abi/bf16/test_basic_returning.c|  20 ++
 .../x86_64/abi/bf16/test_basic_sizes.c|  14 +
 .../bf16/test_basic_struct_size_and_align.c   |  14 +
 .../bf16/test_basic_union_size_and_align.c|  12 +
 .../x86_64/abi/bf16/test_m128_returning.c |  38 +++
 .../x86_64/abi/bf16/test_passing_floats.c | 312 ++
 .../x86_64/abi/bf16/test_passing_m128.c   | 238 +
 .../x86_64/abi/bf16/test_passing_structs.c

[PATCH] i386: Handle memory operand for direct call to cvtps2pd in unpack

2022-07-06 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch aim to fix the ICE for vec unpack using for memory after the commit
r13-1418 on inproper insn of cvtps2pd.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

PR target/106180
* config/i386/sse.md (sse2_cvtps2pd_1):
Rename from *sse2_cvtps2pd_1.
(vec_unpacks_lo_v4sf): Add handler for memory operand.

gcc/testsuite/ChangeLog:

PR target/106180
* g++.target/i386/pr106180-1.C: New test.
---
 gcc/config/i386/sse.md | 12 +++--
 gcc/testsuite/g++.target/i386/pr106180-1.C | 31 ++
 2 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr106180-1.C

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 3396ff748da..5b91c7be54e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -9208,7 +9208,7 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V2DF")])
 
-(define_insn "*sse2_cvtps2pd_1"
+(define_insn "sse2_cvtps2pd_1"
   [(set (match_operand:V2DF 0 "register_operand" "=v")
(float_extend:V2DF
  (match_operand:V2SF 1 "memory_operand" "m")))]
@@ -9270,7 +9270,15 @@
  (vec_select:V2SF
(match_operand:V4SF 1 "vector_operand")
(parallel [(const_int 0) (const_int 1)]]
-  "TARGET_SSE2")
+  "TARGET_SSE2"
+{
+  if (MEM_P (operands[1]))
+{
+  operands[1] = adjust_address_nv (operands[1], V2SFmode, 0);
+  emit_insn (gen_sse2_cvtps2pd_1 (operands[0], operands[1]));
+  DONE;
+}
+})
 
 (define_expand "vec_unpacks_lo_v8sf"
   [(set (match_operand:V4DF 0 "register_operand")
diff --git a/gcc/testsuite/g++.target/i386/pr106180-1.C 
b/gcc/testsuite/g++.target/i386/pr106180-1.C
new file mode 100644
index 000..7f734536001
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr106180-1.C
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -c -ffloat-store  -std=c++11" } */
+
+struct PointT 
+{
+  double x, y;
+};
+using PointF = PointT;
+
+template  struct __array_traits { typedef PointT _Type[_Nm]; };
+template  struct array
+{
+  typename __array_traits<_Nm>::_Type _M_elems;
+};
+
+float SampleGrid_low, SampleGrid_high;
+using QuadrilateralF = array<4>;
+struct PerspectiveTransform
+{
+  PerspectiveTransform (QuadrilateralF, QuadrilateralF);
+};
+
+void SampleGrid()
+{
+  PerspectiveTransform
+  {
+{ PointF {SampleGrid_high, SampleGrid_low},
+  SampleGrid_low, SampleGrid_high },
+{}
+  };
+}
-- 
2.18.2



[PATCH] Add myself for write after approval

2022-06-30 Thread Haochen Jiang via Gcc-patches
Hi all,

I want to add myself in MAINTAINERS for write after approval.

Ok for trunk?

BRs,
Haochen

ChangeLog:

* MAINTAINERS (Write After Approval): Add myself.
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 151770f59f4..3c448ba9eb6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -464,6 +464,7 @@ Harsha Jagasia  

 Fariborz Jahanian  
 Surya Kumari Jangala   
 Qian Jianhua   
+Haochen Jiang  
 Janis Johnson  
 Teresa Johnson 
 Kean Johnston  
-- 
2.18.1



[PATCH] i386: Extend cvtps2pd to memory

2022-06-29 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch aims to fix the cvtps2pd insn, which should also work on
memory operand but currently does not. After this fix, when loop == 2,
it will eliminate movq instruction.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

PR target/43618
* config/i386/sse.md (extendv2sfv2df2): New define_expand.
(sse2_cvtps2pd_load): Rename extendvsdfv2df2.

gcc/testsuite/ChangeLog:

PR target/43618
* gcc.target/i386/pr43618-1.c: New test.
---
 gcc/config/i386/sse.md| 24 ++-
 gcc/testsuite/gcc.target/i386/pr43618-1.c | 13 
 2 files changed, 32 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr43618-1.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 8b2602bfa79..f96bb3dc6c3 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -9175,11 +9175,25 @@
(set_attr "prefix" "evex")
(set_attr "mode" "")])
 
+(define_expand "extendv2sfv2df2"
+  [(set (match_operand:V2DF 0 "register_operand")
+(float_extend:V2DF
+  (match_operand:V2SF 1 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
+{
+  if (!MEM_P (operands[1]))
+{
+  operands[1] = lowpart_subreg (V4SFmode, operands[1], V2SFmode);
+  emit_insn (gen_sse2_cvtps2pd (operands[0], operands[1]));
+  DONE;
+}
+})
+
 (define_insn "sse2_cvtps2pd"
   [(set (match_operand:V2DF 0 "register_operand" "=v")
(float_extend:V2DF
  (vec_select:V2SF
-   (match_operand:V4SF 1 "vector_operand" "vm")
+   (match_operand:V4SF 1 "register_operand" "v")
(parallel [(const_int 0) (const_int 1)]]
   "TARGET_SSE2 && "
   "%vcvtps2pd\t{%1, %0|%0, %q1}"
@@ -9191,12 +9205,12 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V2DF")])
 
-(define_insn "extendv2sfv2df2"
+(define_insn "sse2_cvtps2pd_load"
   [(set (match_operand:V2DF 0 "register_operand" "=v")
(float_extend:V2DF
- (match_operand:V2SF 1 "register_operand" "v")))]
-  "TARGET_MMX_WITH_SSE"
-  "%vcvtps2pd\t{%1, %0|%0, %1}"
+ (match_operand:V2SF 1 "memory_operand" "m")))]
+  "TARGET_MMX_WITH_SSE && "
+  "%vcvtps2pd\t{%1, %0|%0, %q1}"
   [(set_attr "type" "ssecvt")
(set_attr "amdfam10_decode" "direct")
(set_attr "athlon_decode" "double")
diff --git a/gcc/testsuite/gcc.target/i386/pr43618-1.c 
b/gcc/testsuite/gcc.target/i386/pr43618-1.c
new file mode 100644
index 000..3c84ea444aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr43618-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "movq" } } */
+/* { dg-final { scan-assembler "cvtps2pd" } } */
+
+void
+foo (float a[2], double b[2])
+{
+int i;
+for (i = 0; i < 2; i++)
+  b[i] = a[i];
+}
+
-- 
2.18.1



[PATCH] i386: Add AVX512BW to AVX512F in MASK_ISA2

2022-06-29 Thread Haochen Jiang via Gcc-patches
Hi all,

I just found in MASK_ISA2_UNSET part, since AVX512BW is based on AVX512F, we
should add OPTION_MASK_ISA2_AVX512BW_UNSET to AVX512F for maintainence
convenience and logic correctness, or we will need to add all future ISAs based
on AVX512BW in both AVX512F and AVX512BW. This will be easily forgot and might
cause confusion.

Also remove the redundant ones in this change.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AVX512F_UNSET):
Add OPTION_MASK_ISA2_AVX512BW_UNSET, remove
OPTION_MASK_ISA2_AVX512BF16_UNSET and
OPTION_MASK_ISA2_AVX512FP16_UNSET.
---
 gcc/common/config/i386/i386-common.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index cb878163492..c0c2ad74d87 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -315,11 +315,10 @@ along with GCC; see the file COPYING3.  If not see
| OPTION_MASK_ISA_SSE_UNSET)
 
 #define OPTION_MASK_ISA2_AVX512F_UNSET \
-  (OPTION_MASK_ISA2_AVX512BF16_UNSET \
+  (OPTION_MASK_ISA2_AVX512BW_UNSET \
| OPTION_MASK_ISA2_AVX5124FMAPS_UNSET \
| OPTION_MASK_ISA2_AVX5124VNNIW_UNSET \
-   | OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET \
-   | OPTION_MASK_ISA2_AVX512FP16_UNSET)
+   | OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET)
 #define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
   OPTION_MASK_ISA2_SSE_UNSET
 #define OPTION_MASK_ISA2_AVX_UNSET OPTION_MASK_ISA2_AVX2_UNSET
-- 
2.18.1



[PATCH] i386: Add syscall to enable AMX for latest kernels

2022-06-20 Thread Haochen Jiang via Gcc-patches
From: "Jiang, Haochen" 

Hi all,

We need syscall to enable AMX for kernels>=5.4. It is missing in current
amx tests, which will cause test fail.

This patch aims to add them to fix this bug.

BRs,
Haochen

gcc/testsuite/ChangeLog:

* gcc.target/i386/amx-check.h (request_perm_xtile_data):
New function to check if AMX is usable and enable AMX.
(main): Run test if AMX is usable.
---
 gcc/testsuite/gcc.target/i386/amx-check.h | 24 +++
 1 file changed, 24 insertions(+)

diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h 
b/gcc/testsuite/gcc.target/i386/amx-check.h
index 434b0e59703..92ed8669304 100644
--- a/gcc/testsuite/gcc.target/i386/amx-check.h
+++ b/gcc/testsuite/gcc.target/i386/amx-check.h
@@ -4,11 +4,22 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #ifdef DEBUG
 #include 
 #endif
 #include "cpuid.h"
 
+#define XFEATURE_XTILECFG  17
+#define XFEATURE_XTILEDATA 18
+#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG)
+#define XFEATURE_MASK_XTILEDATA(1 << XFEATURE_XTILEDATA)
+#define XFEATURE_MASK_XTILE(XFEATURE_MASK_XTILECFG | 
XFEATURE_MASK_XTILEDATA)
+
+#define ARCH_GET_XCOMP_PERM0x1022
+#define ARCH_REQ_XCOMP_PERM0x1023
+
 /* TODO: The tmm emulation is temporary for current
AMX implementation with no tmm regclass, should
be changed in the future. */
@@ -44,6 +55,18 @@ typedef struct __tile
 /* Stride (colum width in byte) used for tileload/store */
 #define _STRIDE 64
 
+/* We need syscall to use amx functions */
+int request_perm_xtile_data()
+{
+  unsigned long bitmask;
+
+  if (syscall (SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA) ||
+  syscall (SYS_arch_prctl, ARCH_GET_XCOMP_PERM, ))
+return 0;
+
+  return (bitmask & XFEATURE_MASK_XTILE) != 0;
+}
+
 /* Initialize tile config by setting all tmm size to 16x64 */
 void init_tile_config (__tilecfg_u *dst)
 {
@@ -186,6 +209,7 @@ main ()
 #ifdef AMX_BF16
   && __builtin_cpu_supports ("amx-bf16")
 #endif
+  && request_perm_xtile_data ()
   )
 {
   DO_TEST ();
-- 
2.18.2



[PATCH] [i386]Add combine splitter to transform pxor/pcmpeqb/pmovmskb/cmp 0xffff to ptest.

2022-05-06 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch aims to add a combine splitter to transform 
pxor/pcmpeqb/pmovmskb/cmp 0x to ptest.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

PR target/104371
* config/i386/sse.md: Add new define_mode_attr and define_split.

gcc/testsuite/ChangeLog:

PR target/104371
* gcc.target/i386/pr104371-1.c: New test.
* gcc.target/i386/pr104371-2.c: Ditto.
---
 gcc/config/i386/sse.md | 19 +++
 gcc/testsuite/gcc.target/i386/pr104371-1.c | 14 ++
 gcc/testsuite/gcc.target/i386/pr104371-2.c | 14 ++
 3 files changed, 47 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr104371-1.c
 create mode 100755 gcc/testsuite/gcc.target/i386/pr104371-2.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 7b791def542..71afda73c8f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -20083,6 +20083,25 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
 
+;; Optimize pxor/pcmpeqb/pmovmskb/cmp 0x to ptest.
+(define_mode_attr vi1avx2const
+  [(V32QI "0x") (V16QI "0x")])
+
+(define_split
+  [(set (reg:CCZ FLAGS_REG)
+   (compare:CCZ (unspec:SI
+   [(eq:VI1_AVX2
+   (match_operand:VI1_AVX2 0 "vector_operand")
+   (match_operand:VI1_AVX2 1 "const0_operand"))]
+   UNSPEC_MOVMSK)
+(match_operand 2 "const_int_operand")))]
+  "TARGET_SSE4_1 && ix86_match_ccmode (insn, CCmode)
+  && (INTVAL (operands[2]) == (int) ())"
+  [(set (reg:CC FLAGS_REG)
+   (unspec:CC [(match_dup 0)
+   (match_dup 0)]
+  UNSPEC_PTEST))])
+
 (define_expand "sse2_maskmovdqu"
   [(set (match_operand:V16QI 0 "memory_operand")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand")
diff --git a/gcc/testsuite/gcc.target/i386/pr104371-1.c 
b/gcc/testsuite/gcc.target/i386/pr104371-1.c
new file mode 100644
index 000..df7c0b074e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr104371-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4" } */
+/* { dg-final { scan-assembler "ptest\[ \\t\]" } } */
+/* { dg-final { scan-assembler-not "pxor\[ \\t\]" } } */
+/* { dg-final { scan-assembler-not "pcmpeqb\[ \\t\]" } } */
+/* { dg-final { scan-assembler-not "pmovmskb\[ \\t\]" } } */
+
+#include 
+#include 
+
+bool is_zero(__m128i x)
+{
+  return _mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128())) == 0x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr104371-2.c 
b/gcc/testsuite/gcc.target/i386/pr104371-2.c
new file mode 100755
index 000..f0d0afd5897
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr104371-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2" } */
+/* { dg-final { scan-assembler "vptest\[ \\t\]" } } */
+/* { dg-final { scan-assembler-not "vpxor\[ \\t\]" } } */
+/* { dg-final { scan-assembler-not "vpcmpeqb\[ \\t\]" } } */
+/* { dg-final { scan-assembler-not "vpmovmskb\[ \\t\]" } } */
+
+#include 
+#include 
+
+bool is_zero256(__m256i x)
+{
+  return _mm256_movemask_epi8(_mm256_cmpeq_epi8(x, _mm256_setzero_si256())) == 
0x;
+}
-- 
2.18.1



[PATCH] Reconstruct i386 testsuite with __builtin_cpu_supports

2022-05-06 Thread Haochen Jiang via Gcc-patches
Hi all,

There are some check files in i386 testsuite are written before the function 
__builtin_cpu_supports is introduced. All of them are using __get_cpuid_count. 
This patch aims to reconstruct the i386 testsuite with __builtin_cpu_supports 
so that we can have a much clearer code.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

Also when writting this patch, I also find some files in testsuite that might 
be useless currently. For example, in the file 
gcc/testsuite/gcc.target/i386/sse-os-support.h, it always return 1. And there 
are also some files will no longer be included at all with this patch. Should 
we remove those files when we have time?

BRs,
Haochen

gcc/testsuite/ChangeLog:

* gcc.target/i386/adx-check.h: Change bit check to
__builtin_cpu_supports.
* gcc.target/i386/aes-avx-check.h: Ditto.
* gcc.target/i386/aes-check.h: Ditto.
* gcc.target/i386/avx-check.h: Ditto.
* gcc.target/i386/avx2-check.h: Ditto.
* gcc.target/i386/avx512-check.h: Ditto.
* gcc.target/i386/bmi-check.h: Ditto.
* gcc.target/i386/bmi2-check.h: Ditto.
* gcc.target/i386/f16c-check.h: Ditto.
* gcc.target/i386/fma-check.h: Ditto.
* gcc.target/i386/fma4-check.h: Ditto.
* gcc.target/i386/lzcnt-check.h: Ditto.
* gcc.target/i386/mmx-3dnow-check.h: Ditto.
* gcc.target/i386/mmx-check.h: Ditto.
* gcc.target/i386/pclmul-avx-check.h: Ditto.
* gcc.target/i386/pclmul-check.h: Ditto.
* gcc.target/i386/rtm-check.h: Ditto.
* gcc.target/i386/sha-check.h: Ditto.
* gcc.target/i386/sse-check.h: Ditto.
* gcc.target/i386/sse2-check.h: Ditto.
* gcc.target/i386/sse3-check.h: Ditto.
* gcc.target/i386/sse4_1-check.h: Ditto.
* gcc.target/i386/sse4_2-check.h: Ditto.
* gcc.target/i386/sse4a-check.h: Ditto.
* gcc.target/i386/ssse3-check.h: Ditto.
* gcc.target/i386/xop-check.h: Ditto.
---
 gcc/testsuite/gcc.target/i386/adx-check.h | 10 +---
 gcc/testsuite/gcc.target/i386/aes-avx-check.h | 14 +
 gcc/testsuite/gcc.target/i386/aes-check.h | 11 +---
 gcc/testsuite/gcc.target/i386/avx-check.h | 12 +---
 gcc/testsuite/gcc.target/i386/avx2-check.h| 20 +--
 gcc/testsuite/gcc.target/i386/avx512-check.h  | 59 +++
 gcc/testsuite/gcc.target/i386/bmi-check.h | 11 +---
 gcc/testsuite/gcc.target/i386/bmi2-check.h| 10 +---
 gcc/testsuite/gcc.target/i386/f16c-check.h| 10 +---
 gcc/testsuite/gcc.target/i386/fma-check.h | 11 +---
 gcc/testsuite/gcc.target/i386/fma4-check.h| 11 +---
 gcc/testsuite/gcc.target/i386/lzcnt-check.h   | 11 +---
 .../gcc.target/i386/mmx-3dnow-check.h | 11 +---
 gcc/testsuite/gcc.target/i386/mmx-check.h | 11 +---
 .../gcc.target/i386/pclmul-avx-check.h| 14 +
 gcc/testsuite/gcc.target/i386/pclmul-check.h  | 11 +---
 gcc/testsuite/gcc.target/i386/rtm-check.h | 10 +---
 gcc/testsuite/gcc.target/i386/sha-check.h | 10 +---
 gcc/testsuite/gcc.target/i386/sse-check.h | 11 +---
 gcc/testsuite/gcc.target/i386/sse2-check.h| 11 +---
 gcc/testsuite/gcc.target/i386/sse3-check.h| 11 +---
 gcc/testsuite/gcc.target/i386/sse4_1-check.h  | 11 +---
 gcc/testsuite/gcc.target/i386/sse4_2-check.h  | 11 +---
 gcc/testsuite/gcc.target/i386/sse4a-check.h   | 11 +---
 gcc/testsuite/gcc.target/i386/ssse3-check.h   | 11 +---
 gcc/testsuite/gcc.target/i386/xop-check.h | 11 +---
 26 files changed, 73 insertions(+), 272 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/adx-check.h 
b/gcc/testsuite/gcc.target/i386/adx-check.h
index cfed1a38483..bed5dcca385 100644
--- a/gcc/testsuite/gcc.target/i386/adx-check.h
+++ b/gcc/testsuite/gcc.target/i386/adx-check.h
@@ -1,5 +1,4 @@
 #include 
-#include "cpuid.h"
 
 static void adx_test (void);
 
@@ -11,13 +10,8 @@ static void __attribute__ ((noinline)) do_test (void)
 int
 main ()
 {
-  unsigned int eax, ebx, ecx, edx;
-
-  if (!__get_cpuid_count (7, 0, , , , ))
-return 0;
-
-  /* Run ADX test only if host has ADX support.  */
-  if (ebx & bit_ADX)
+  /* Check cpu support for ADX.  */
+  if (__builtin_cpu_supports ("adx"))
 {
   do_test ();
 #ifdef DEBUG
diff --git a/gcc/testsuite/gcc.target/i386/aes-avx-check.h 
b/gcc/testsuite/gcc.target/i386/aes-avx-check.h
index f2a4ead4014..74bf597ead4 100644
--- a/gcc/testsuite/gcc.target/i386/aes-avx-check.h
+++ b/gcc/testsuite/gcc.target/i386/aes-avx-check.h
@@ -2,8 +2,6 @@
 #include 
 #endif
 #include 
-#include "cpuid.h"
-#include "avx-os-support.h"
 
 static void aes_avx_test (void);
 
@@ -17,15 +15,9 @@ do_test (void)
 int
 main ()
 {
-  unsigned int eax, ebx, ecx, edx;
- 
-  if (!__get_cpuid (1, , , , ))
-return 0;
-
-  /* Run AES + AVX test only if host has AES + AVX support.  */
-  if (((ecx & (bit_AVX | bit_OSXSAVE | bit_AES))
-   == (bit_AVX | bit_OSXSAVE | bit_AES))
-  && avx_os_support ())
+  /* Check cpu support for AES and AVX.  */

[PATCH] [i386] Optimize a ^ ((a ^ b) & mask) to (~mask & a) | (b & mask).

2022-01-12 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch targets PR94790, which change the instruction selection under the 
following circumstance.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

>From the perspective of the pipeline, `andn + and + ior` version take
2 cycles(AND and ANDN doesn't have dependence), but xor + and + xor
will take 3 cycles.

-   xorl%edi, %esi
andl%edx, %esi
-   movl%esi, %eax
-   xorl%edi, %eax
+   andn%edi, %edx, %eax
+   orl %esi, %eax

gcc/ChangeLog:

PR taeget/94790
* config/i386/i386.md (*xor2andn): New define_insn_and_split.

gcc/testsuite/ChangeLog:

PR taeget/94790
* gcc.target/i386/pr94790-1.c: New test.
* gcc.target/i386/pr94790-2.c: Ditto.
---
 gcc/config/i386/i386.md   | 39 +++
 gcc/testsuite/gcc.target/i386/pr94790-1.c | 14 
 gcc/testsuite/gcc.target/i386/pr94790-2.c |  9 ++
 3 files changed, 62 insertions(+)
 create mode 100755 gcc/testsuite/gcc.target/i386/pr94790-1.c
 create mode 100755 gcc/testsuite/gcc.target/i386/pr94790-2.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 9b424a3935b..38efc6d5837 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -10452,6 +10452,45 @@
(set_attr "znver1_decode" "double")
(set_attr "mode" "DI")])
 
+;; PR target/94790: Optimize a ^ ((a ^ b) & mask) to (~mask & a) | (b & mask)
+(define_insn_and_split "*xor2andn"
+  [(set (match_operand:SWI248 0 "nonimmediate_operand")
+   (xor:SWI248
+ (and:SWI248
+   (xor:SWI248
+ (match_operand:SWI248 1 "nonimmediate_operand")
+ (match_operand:SWI248 2 "nonimmediate_operand"))
+   (match_operand:SWI248 3 "nonimmediate_operand"))
+ (match_dup 1)))
+(clobber (reg:CC FLAGS_REG))]
+  "(TARGET_BMI || TARGET_AVX512BW)
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 4)
+   (and:SWI248
+ (not:SWI248
+   (match_dup 3))
+ (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 5)
+   (and:SWI248
+ (match_dup 2)
+ (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0)
+   (ior:SWI248
+ (match_dup 4)
+ (match_dup 5)))
+ (clobber (reg:CC FLAGS_REG))])]
+  {
+operands[1] = force_reg (mode, operands[1]);
+operands[3] = force_reg (mode, operands[3]);
+operands[4] = gen_reg_rtx (mode);
+operands[5] = gen_reg_rtx (mode);
+  }
+)
+
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*si_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/i386/pr94790-1.c 
b/gcc/testsuite/gcc.target/i386/pr94790-1.c
new file mode 100755
index 000..6ebbec15cfd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr94790-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi" } */
+/* { dg-final { scan-assembler-times "andn\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-not "xorl\[ \\t\]" } } */
+
+unsigned r1(unsigned a, unsigned b, unsigned mask)
+{
+  return a ^ ((a ^ b) & mask);
+}
+
+unsigned r2(unsigned a, unsigned b, unsigned mask)
+{
+  return (~mask & a) | (b & mask);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr94790-2.c 
b/gcc/testsuite/gcc.target/i386/pr94790-2.c
new file mode 100755
index 000..d7b0eec5bef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr94790-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi" } */
+/* { dg-final { scan-assembler-not "andn\[ \\t\]" } } */
+/* { dg-final { scan-assembler-times "xorl\[ \\t\]" 2 } } */
+
+unsigned r1(unsigned a, unsigned b, unsigned mask)
+{
+  return a ^ ((a ^ b) & mask) + (a ^ b);
+}
-- 
2.18.1



[PATCH] [i386] Remove register restriction on operands for andnot insn

2022-01-09 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch removes the register restriction on operands for andnot insn so that 
it can be used from memory.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

PR target/53652
* config/i386/sse.md (*andnot3): Remove register restriction.

gcc/testsuite/ChangeLog:

PR target/53652
* gcc.target/i386/pr53652-1.c: New test.
---
 gcc/config/i386/sse.md|  2 +-
 gcc/testsuite/gcc.target/i386/pr53652-1.c | 16 
 2 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr53652-1.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 0997d9edf9d..4448b875d35 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -16630,7 +16630,7 @@
 (define_insn "*andnot3"
   [(set (match_operand:VI 0 "register_operand" "=x,x,v")
(and:VI
- (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
+ (not:VI (match_operand:VI 1 "vector_operand" "0,x,v"))
  (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
   "TARGET_SSE"
 {
diff --git a/gcc/testsuite/gcc.target/i386/pr53652-1.c 
b/gcc/testsuite/gcc.target/i386/pr53652-1.c
new file mode 100644
index 000..bd07ee29f4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr53652-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+/* { dg-final { scan-assembler-times "pandn\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-not "vpternlogq\[ \\t\]" } } */
+
+typedef unsigned long long vec __attribute__((vector_size (16)));
+vec g;
+vec f1 (vec a, vec b)
+{
+  return ~a
+}
+vec f2 (vec a, vec b)
+{
+  return ~g
+}
+
-- 
2.18.1



[PATCH] [i386]Fix tdpbf16ps testcase

2021-12-24 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch fix the testcase of amxbf16-dpbf16ps-2.c. Previously the type 
convert has some issue.

Ok for trunk?

BRs,
Haochen

gcc/testsuite/ChangeLog:

* gcc.target/i386/amx-check.h (check_float_tile_register):
New check function for float to prevent precision loss.
* gcc.target/i386/amxbf16-dpbf16ps-2.c: Correct the type convert
and byte offset. Use the new check function.
---
 gcc/testsuite/gcc.target/i386/amx-check.h | 23 --
 .../gcc.target/i386/amxbf16-dpbf16ps-2.c  | 30 ---
 2 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h 
b/gcc/testsuite/gcc.target/i386/amx-check.h
index 03616ff0b8e..434b0e59703 100644
--- a/gcc/testsuite/gcc.target/i386/amx-check.h
+++ b/gcc/testsuite/gcc.target/i386/amx-check.h
@@ -139,8 +139,27 @@ int check_tile_register (__tile* ref, __tile* target)
 
   for (i = 0; i < rows; i++)
 for (j = 0; j < colsb; j++)
-   if (ref->buf[i * colsb + j] != target->buf[i * colsb + j])
-   return 0;
+  if (ref->buf[i * colsb + j] != target->buf[i * colsb + j])
+   return 0;
+
+  return 1;
+}
+
+/* Compare float tile register value with __tile variable */
+int check_float_tile_register (__tile* ref, __tile* target)
+{
+  /* Tile register should be stored from tmm to
+ memory and compare with emulation results. */
+  int rows = target->rows;
+  int colsb = target->colsb / 4;
+  int i, j;
+  uint32_t *ref_buf = (uint32_t *) ref->buf;
+  uint32_t *target_buf = (uint32_t *) target->buf;
+
+  for (i = 0; i < rows; i++)
+for (j = 0; j < colsb; j++)
+  if (abs(ref_buf[i * colsb + j] - target_buf[i * colsb + j]) > 1)
+   return 0;
 
   return 1;
 }
diff --git a/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c 
b/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c
index f7002ca5ea5..b00bc13ec78 100644
--- a/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c
+++ b/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c
@@ -12,15 +12,25 @@ void test_amx_bf16_dpbf16ps ();
 /* Transformation functions between bf16/float */
 static uint16_t make_bf16 (float f)
 {
-  uint32_t u = (uint32_t)f;
-  u = (u >> 16) & 0x;
-  return (uint16_t)u;
+  union
+  {
+float f;
+uint32_t u;
+  } fu;
+  fu.f = f;
+  fu.u = (fu.u >> 16) & 0x;
+  return (uint16_t) fu.u;
 }
 
 static float make_f32 (uint16_t bf)
 {
-  uint32_t u = (uint32_t)(bf << 16);
-  return (float)u;
+  union
+  {
+float f;
+uint32_t u;
+  } fu;
+  fu.u = (uint32_t) bf << 16;
+  return fu.f;
 }
 
 /* Init tile buffer with bf16 pairs */
@@ -54,10 +64,10 @@ void calc_matrix_dpbf16ps (__tile *dst, __tile *src1, 
__tile *src2)
for (t = 0; t < 2; t+=2)
  {
dst_buf[i * N + k] += 
- (make_f32(src1_buf[i * 4 * N + 4 * j + t]) *
- make_f32(src2_buf[j * 4 * K + 4 * k + t])) +
- (make_f32(src1_buf[i * 4 * N + 4 * j + t + 1]) *
- make_f32(src2_buf[j * 4 * K + 4 * k + t + 1]));
+ (make_f32(src1_buf[i * 2 * N + 2 * j + t]) *
+ make_f32(src2_buf[j * 2 * K + 2 * k + t])) +
+ (make_f32(src1_buf[i * 2 * N + 2 * j + t + 1]) *
+ make_f32(src2_buf[j * 2 * K + 2 * k + t + 1]));
  }
 
 }
@@ -80,6 +90,6 @@ void test_amx_bf16_dpbf16ps ()
   _tile_dpbf16ps (1, 2, 3);
   _tile_stored (1, dst_ref.buf, _STRIDE);
 
-  if (!check_tile_register (_ref, ))
+  if (!check_float_tile_register (_ref, ))
 abort();
 }
-- 
2.18.1



[PATCH] [i386]Add missing BMI function to align with clang

2021-12-20 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch adds missing BMI function _tzcnt_u16, _andn_u32, _andn_u64 to align 
with clang.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* config/i386/bmiintrin.h (_tzcnt_u16): New define function.
(_andn_u32): Ditto.
(_andn_u64): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/bmi-1.c: Add new test for new define function.
* gcc.target/i386/bmi-2.c: Ditto.
* gcc.target/i386/bmi-3.c: Ditto.
---
 gcc/config/i386/bmiintrin.h   | 18 ++
 gcc/testsuite/gcc.target/i386/bmi-1.c |  8 +++-
 gcc/testsuite/gcc.target/i386/bmi-2.c |  8 +++-
 gcc/testsuite/gcc.target/i386/bmi-3.c |  8 +++-
 4 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
index 439d81cba11..92450a644eb 100644
--- a/gcc/config/i386/bmiintrin.h
+++ b/gcc/config/i386/bmiintrin.h
@@ -40,12 +40,24 @@ __tzcnt_u16 (unsigned short __X)
   return __builtin_ia32_tzcnt_u16 (__X);
 }
 
+extern __inline unsigned short __attribute__((__gnu_inline__, 
__always_inline__, __artificial__))
+_tzcnt_u16 (unsigned short __X)
+{
+  return __builtin_ia32_tzcnt_u16 (__X);
+}
+
 extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 __andn_u32 (unsigned int __X, unsigned int __Y)
 {
   return ~__X & __Y;
 }
 
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
+_andn_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __andn_u32 (__X, __Y);
+}
+
 extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 __bextr_u32 (unsigned int __X, unsigned int __Y)
 {
@@ -114,6 +126,12 @@ __andn_u64 (unsigned long long __X, unsigned long long __Y)
   return ~__X & __Y;
 }
 
+extern __inline unsigned long long __attribute__((__gnu_inline__, 
__always_inline__, __artificial__))
+_andn_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __andn_u64 (__X, __Y);
+}
+
 extern __inline unsigned long long __attribute__((__gnu_inline__, 
__always_inline__, __artificial__))
 __bextr_u64 (unsigned long long __X, unsigned long long __Y)
 {
diff --git a/gcc/testsuite/gcc.target/i386/bmi-1.c 
b/gcc/testsuite/gcc.target/i386/bmi-1.c
index 738705e29d8..141adaac016 100644
--- a/gcc/testsuite/gcc.target/i386/bmi-1.c
+++ b/gcc/testsuite/gcc.target/i386/bmi-1.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -fno-ipa-icf -mbmi " } */
-/* { dg-final { scan-assembler "andn\[^\\n]*eax" } } */
+/* { dg-final { scan-assembler-times "andn\[^\\n]*eax" 2 } } */
 /* { dg-final { scan-assembler-times "bextr\[ \\t]+\[^\\n]*eax" 2 } } */
 /* { dg-final { scan-assembler-times "blsi\[^\\n]*eax" 2 } } */
 /* { dg-final { scan-assembler-times "blsmsk\[^\\n]*eax" 2 } } */
@@ -15,6 +15,12 @@ func_andn32 (unsigned int X, unsigned int Y)
   return __andn_u32(X, Y);
 }
 
+unsigned int
+func_andn32_2 (unsigned int X, unsigned int Y)
+{
+  return _andn_u32(X, Y);
+}
+
 unsigned int
 func_bextr32 (unsigned int X, unsigned int Y)
 {
diff --git a/gcc/testsuite/gcc.target/i386/bmi-2.c 
b/gcc/testsuite/gcc.target/i386/bmi-2.c
index 6b8595eb9e1..3f9052a4991 100644
--- a/gcc/testsuite/gcc.target/i386/bmi-2.c
+++ b/gcc/testsuite/gcc.target/i386/bmi-2.c
@@ -1,6 +1,6 @@
 /* { dg-do compile { target { ! ia32  } } } */
 /* { dg-options "-O2 -fno-ipa-icf -mbmi " } */
-/* { dg-final { scan-assembler "andn\[^\\n]*rax" } } */
+/* { dg-final { scan-assembler-times "andn\[^\\n]*rax" 2 } } */
 /* { dg-final { scan-assembler-times "bextr\[ \\t]+\[^\\n]*rax" 2 } } */
 /* { dg-final { scan-assembler-times "blsi\[^\\n]*rax" 2 } } */
 /* { dg-final { scan-assembler-times "blsmsk\[^\\n]*rax" 2 } } */
@@ -15,6 +15,12 @@ func_andn64 (unsigned long long X, unsigned long long Y)
   return __andn_u64 (X, Y);
 }
 
+unsigned long long
+func_andn64_2 (unsigned long long X, unsigned long long Y)
+{
+  return _andn_u64 (X, Y);
+}
+
 unsigned long long
 func_bextr64 (unsigned long long X, unsigned long long Y)
 {
diff --git a/gcc/testsuite/gcc.target/i386/bmi-3.c 
b/gcc/testsuite/gcc.target/i386/bmi-3.c
index ddc5e0f66e2..0b91bc25bf8 100644
--- a/gcc/testsuite/gcc.target/i386/bmi-3.c
+++ b/gcc/testsuite/gcc.target/i386/bmi-3.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mbmi " } */
-/* { dg-final { scan-assembler "tzcntw\[^\\n]*(%|)ax" } } */
+/* { dg-final { scan-assembler-times "tzcntw\[^\\n]*%?ax" 2 } } */
 
 #include 
 
@@ -9,3 +9,9 @@ func_tzcnt16 (unsigned short X)
 {
   return __tzcnt_u16(X);
 }
+
+unsigned short
+func_tzcnt16_2 (unsigned short X)
+{
+  return _tzcnt_u16(X);
+}
-- 
2.18.1



[PATCH] [i386][avx512]Add combine splitter to transform vpternlogd/vpcmpeqd/vpxor/vblendvps to vblendvps for ~op0

2021-12-14 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch fix the regression previously reported on the combine splitter under 
'-m32 -march=cascadelake' options.

Regtested on x86_64-pc-linux-gnu.

BRs,
Haochen

gcc/ChangeLog:

PR target/100738
* config/i386/sse.md (*avx_cmp3_lt, *avx_cmp3_ltint):
Remove MEM_P restriction and add force_reg for operands[2].
(*avx_cmp3_ltint_not): Add new define_insn_and_split.

gcc/testsuite/ChangeLog:

PR target/100738
* g++.target/i386/avx512vl-pr100738-1.C: New test.
---
 gcc/config/i386/sse.md| 44 +--
 .../g++.target/i386/avx512vl-pr100738-1.C |  8 
 2 files changed, 48 insertions(+), 4 deletions(-)
 create mode 100755 gcc/testsuite/g++.target/i386/avx512vl-pr100738-1.C

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5421fb51684..8ec9fb075d0 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3528,8 +3528,7 @@
 UNSPEC_PCMP)))]
   "TARGET_AVX512VL && ix86_pre_reload_split ()
   /* LT or GE 0 */
-  && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
-  || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
+  && ((INTVAL (operands[5]) == 1) || (INTVAL (operands[5]) == 5))"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -3543,6 +3542,7 @@
 {
   if (INTVAL (operands[5]) == 5)
 std::swap (operands[1], operands[2]);
+  operands[2] = force_reg (mode, operands[2]);
 })
 
 (define_insn_and_split "*avx_cmp3_ltint"
@@ -3557,8 +3557,7 @@
 UNSPEC_PCMP)))]
   "TARGET_AVX512VL && ix86_pre_reload_split ()
   /* LT or GE 0 */
-  && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
-  || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
+  && ((INTVAL (operands[5]) == 1) || (INTVAL (operands[5]) == 5))"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -3575,7 +3574,44 @@
 std::swap (operands[1], operands[2]);
   operands[0] = gen_lowpart (mode, operands[0]);
   operands[1] = gen_lowpart (mode, operands[1]);
+  operands[2] = force_reg (mode,
+ gen_lowpart (mode, operands[2]));
+})
+
+(define_insn_and_split "*avx_cmp3_ltint_not"
+ [(set (match_operand:VI48_AVX  0 "register_operand")
+   (vec_merge:VI48_AVX
+(match_operand:VI48_AVX 1 "vector_operand")
+(match_operand:VI48_AVX 2 "vector_operand")
+(unspec:
+  [(subreg:VI48_AVX
+   (not:
+ (match_operand: 3 "vector_operand")) 0)
+   (match_operand:VI48_AVX 4 "const0_operand")
+   (match_operand:SI 5 "const_0_to_7_operand")]
+   UNSPEC_PCMP)))]
+  "TARGET_AVX512VL && ix86_pre_reload_split ()
+  /* not LT or GE 0 */
+  && ((INTVAL (operands[5]) == 1) || (INTVAL (operands[5]) == 5))"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (unspec:
+ [(match_dup 1)
+  (match_dup 2)
+  (subreg:
+(lt:VI48_AVX
+ (match_dup 3)
+ (match_dup 4)) 0)]
+   UNSPEC_BLENDV))]
+{
+  if (INTVAL (operands[5]) == 5)
+std::swap (operands[1], operands[2]);
+  operands[0] = gen_lowpart (mode, operands[0]);
+  operands[1] = force_reg (mode,
+ gen_lowpart (mode, operands[1]));
   operands[2] = gen_lowpart (mode, operands[2]);
+  operands[3] = lowpart_subreg (mode, operands[3], mode);
 })
 
 (define_insn "avx_vmcmp3"
diff --git a/gcc/testsuite/g++.target/i386/avx512vl-pr100738-1.C 
b/gcc/testsuite/g++.target/i386/avx512vl-pr100738-1.C
new file mode 100755
index 000..ac4d62b94d1
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512vl-pr100738-1.C
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -march=cascadelake" } */
+/* { dg-final {scan-assembler-times "vblendvps\[ \\t\]" 2 } } */
+/* { dg-final {scan-assembler-not "vpcmpeqd\[ \\t\]" } } */
+/* { dg-final {scan-assembler-not "vpxor\[ \\t\]" } } */
+/* { dg-final {scan-assembler-not "vpternlogd\[ \\t\]" } } */
+
+#include "pr100738-1.C"
-- 
2.18.1



[PATCH] [i386]Add combine splitter to transform vashr/vlshr/vashl_optab to ashr/lshr/ashl_optab for const vector duplicate operand.

2021-12-07 Thread Haochen Jiang via Gcc-patches
Hi,

This patch add combine splitter to transform vashr/vlshr/vashl_optab to 
ashr/lshr/ashl_optab for const vector duplicate operand.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

PR target/101796
* config/i386/predicates.md (const_vector_operand):
Add new predicate.
* config/i386/sse.md(3):
Add new define_split below.

gcc/testsuite/ChangeLog:

PR target/101796
* gcc.target/i386/pr101796-1.c: New test.
---
 gcc/config/i386/predicates.md  | 13 +
 gcc/config/i386/sse.md | 14 ++
 gcc/testsuite/gcc.target/i386/pr101796-1.c | 20 
 3 files changed, 47 insertions(+)
 create mode 100755 gcc/testsuite/gcc.target/i386/pr101796-1.c

diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 4ccbe11b842..770e2f0c0dd 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1844,6 +1844,19 @@
   return true;
 })
 
+;; Return true if OP is a const vector with duplicate value.
+(define_predicate "const_vector_duplicate_operand"
+  (match_code "const_vector")
+{
+  rtx elt = XVECEXP (op, 0, 0);
+  int i, nelt = XVECLEN (op, 0);
+
+  for (i = 1; i < nelt; ++i)
+if (!rtx_equal_p (elt, XVECEXP (op, 0, i)))
+  return false;
+  return true;
+})
+
 ;; Return true if OP is a parallel for a vbroadcast permute.
 (define_predicate "avx_vbroadcast_operand"
   (and (match_code "parallel")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 08bdcddc111..a2c0c1209c7 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15232,6 +15232,20 @@
(const_string "0")))
(set_attr "mode" "")])
 
+;; PR target/101796: Transfrom movl+vpbranchcastw+vpsravw to vpsraw
+;; when COUNT is immediate.
+(define_split
+  [(set (match_operand:VI248_AVX512BW 0 "register_operand")
+   (any_shift:VI248_AVX512BW
+ (match_operand:VI248_AVX512BW 1 "nonimmediate_operand")
+ (match_operand:VI248_AVX512BW 2 "const_vector_duplicate_operand")))]
+  "TARGET_AVX512F && GET_MODE_UNIT_BITSIZE (mode)
+   > INTVAL (XVECEXP (operands[2], 0, 0))"
+  [(set (match_dup 0)
+   (any_shift:VI248_AVX512BW
+ (match_dup 1)
+ (match_dup 3)))]
+  "operands[3] = XVECEXP (operands[2], 0, 0);")
 
 (define_expand "vec_shl_"
   [(set (match_dup 3)
diff --git a/gcc/testsuite/gcc.target/i386/pr101796-1.c 
b/gcc/testsuite/gcc.target/i386/pr101796-1.c
new file mode 100755
index 000..32ae5909913
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101796-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-final {scan-assembler-times "vpsrlw\[ \\t\]" 1 } } */
+/* { dg-final {scan-assembler-times "vpsllw\[ \\t\]" 1 } } */
+/* { dg-final {scan-assembler-times "vpsraw\[ \\t\]" 1 } } */
+/* { dg-final {scan-assembler-not "vpbroadcastw\[ \\t\]" } } */
+/* { dg-final {scan-assembler-not "vpsrlvw\[ \\t\]" } } */
+/* { dg-final {scan-assembler-not "vpsllvw\[ \\t\]" } } */
+/* { dg-final {scan-assembler-not "vpsravw\[ \\t\]" } } */
+#include 
+
+volatile __m512i a, b;
+
+void
+foo()
+{
+  b = _mm512_srlv_epi16 (a, _mm512_set1_epi16 (3));
+  b = _mm512_sllv_epi16 (a, _mm512_set1_epi16 (4));
+  b = _mm512_srav_epi16 (a, _mm512_set1_epi16 (5));
+}
-- 
2.18.1



[PATCH] [i386]Add combine splitter to transform vpcmpeqd/vpxor/vblendvps to vblendvps for ~op0

2021-12-06 Thread Haochen Jiang via Gcc-patches
This patch adds combine splitter to transform vpcmpeqd/vpxor/vblendvps to 
vblendvps for ~op0.

OK for trunk?

BRs,
Haochen

gcc/ChangeLog:

PR target/100738
* config/i386/sse.md 
(*_blendv_not_ltint):
Add new define_insn_and_split.

gcc/testsuite/ChangeLog:

PR target/100738
* g++.target/i386/pr100738-1.C: New test.

---
 gcc/config/i386/sse.md | 28 ++
 gcc/testsuite/g++.target/i386/pr100738-1.C | 19 +++
 2 files changed, 47 insertions(+)
 create mode 100755 gcc/testsuite/g++.target/i386/pr100738-1.C

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 08bdcddc111..db3506c78d7 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -20659,6 +20659,34 @@
(set_attr "btver2_decode" "vector,vector,vector") 
(set_attr "mode" "")])
 
+;; PR target/100738: Transform vpcmpeqd + vpxor + vblendvps to vblendvps for 
inverted mask;
+(define_insn_and_split 
"*_blendv_not_ltint"
+  [(set (match_operand: 0 "register_operand")
+   (unspec:
+ [(match_operand: 1 "register_operand")
+  (match_operand: 2 "vector_operand")
+  (subreg:
+(lt:VI48_AVX
+  (subreg:VI48_AVX
+  (not:
+(match_operand: 3 "register_operand")) 0)
+  (match_operand:VI48_AVX 4 "const0_operand")) 0)]
+ UNSPEC_BLENDV))]
+  "TARGET_SSE4_1 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (unspec:
+[(match_dup 2) (match_dup 1) (match_dup 3)] UNSPEC_BLENDV))]
+{
+  operands[0] = gen_lowpart (mode, operands[0]);
+  operands[1] = gen_lowpart (mode, operands[1]);
+  operands[2] = gen_lowpart (mode, operands[2]);
+  operands[3] = gen_lowpart (mode, operands[3]);
+  if (MEM_P (operands[2]))
+operands[2] = force_reg (mode, operands[2]);
+})
+
 (define_insn "_dp"
   [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
(unspec:VF_128_256
diff --git a/gcc/testsuite/g++.target/i386/pr100738-1.C 
b/gcc/testsuite/g++.target/i386/pr100738-1.C
new file mode 100755
index 000..5a04c5b031f
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr100738-1.C
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx2" } */
+/* { dg-final {scan-assembler-times "vblendvps\[ \\t\]" 2 } } */
+/* { dg-final {scan-assembler-not "vpcmpeqd\[ \\t\]" } } */
+/* { dg-final {scan-assembler-not "vpxor\[ \\t\]" } } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef char v16qi __attribute__((vector_size(16)));
+v4si
+foo_1 (v16qi a, v4si b, v4si c, v4si d)
+{
+  return ((v4si)~a) < 0 ? c : d;
+}
+
+v4si
+foo_2 (v16qi a, v4si b, v4si c, v4si d)
+{
+  return ((v4si)~a) >= 0 ? c : d;
+}
-- 
2.18.1