RE: [PATCH] AVX512FP16: Support cond_op for HFmode

Liu, Hongtao via Gcc-patches Thu, 23 Sep 2021 02:31:02 -0700

>-----Original Message-----
>From: Wang, Hongyu <hongyu.w...@intel.com>
>Sent: Thursday, September 23, 2021 5:16 PM
>To: Liu, Hongtao <hongtao....@intel.com>
>Cc: gcc-patches@gcc.gnu.org
>Subject: [PATCH] AVX512FP16: Support cond_op for HFmode
>
>Hi,
>
>This patch extend the expanders for cond_op to support vector HF modes.
>bootstraped and regtested on x86_64-pc-linux-gnu{-m32,}.
Do runtime tests passe on sde{-m32,}?
>Ok for master?
>
>gcc/ChangeLog:
>
>       * config/i386/sse.md (cond_<insn><mode>): Extend to support
>       vector HFmodes.
>       (cond_mul<mode>): Likewise.
>       (cond_div<mode>): Likewise.
>       (cond_<code><mode>): Likewise.
>       (cond_fma<mode>): Likewise.
>       (cond_fms<mode>): Likewise.
>       (cond_fnma<mode>): Likewise.
>       (cond_fnms<mode>): Likewise.
>
>gcc/testsuite/ChangeLog:
>
>       * gcc.target/i386/cond_op_addsubmuldiv__Float16-1.c: New test.
>       * gcc.target/i386/cond_op_addsubmuldiv__Float16-2.c: Ditto.
>       * gcc.target/i386/cond_op_fma__Float16-1.c: Ditto.
>       * gcc.target/i386/cond_op_fma__Float16-2.c: Ditto.
>       * gcc.target/i386/cond_op_maxmin__Float16-1.c: Ditto.
>       * gcc.target/i386/cond_op_maxmin__Float16-2.c: Ditto.
>---
> gcc/config/i386/sse.md                        | 112 +++++++++---------
> .../i386/cond_op_addsubmuldiv__Float16-1.c    |   9 ++
> .../i386/cond_op_addsubmuldiv__Float16-2.c    |   7 ++
> .../gcc.target/i386/cond_op_fma__Float16-1.c  |  20 ++++
> .../gcc.target/i386/cond_op_fma__Float16-2.c  |   7 ++
> .../i386/cond_op_maxmin__Float16-1.c          |   8 ++
> .../i386/cond_op_maxmin__Float16-2.c          |   6 +
> 7 files changed, 113 insertions(+), 56 deletions(-)  create mode 100644
>gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv__Float16-1.c
> create mode 100644
>gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv__Float16-2.c
> create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_fma__Float16-1.c
> create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_fma__Float16-2.c
> create mode 100644
>gcc/testsuite/gcc.target/i386/cond_op_maxmin__Float16-1.c
> create mode 100644
>gcc/testsuite/gcc.target/i386/cond_op_maxmin__Float16-2.c
>
>diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index
>1ca95984afc..c2eeb7b1517 100644
>--- a/gcc/config/i386/sse.md
>+++ b/gcc/config/i386/sse.md
>@@ -2118,12 +2118,12 @@
>   [(set_attr "isa" "noavx,noavx,avx,avx")])
>
> (define_expand "cond_<insn><mode>"
>-  [(set (match_operand:VF 0 "register_operand")
>-      (vec_merge:VF
>-        (plusminus:VF
>-          (match_operand:VF 2 "vector_operand")
>-          (match_operand:VF 3 "vector_operand"))
>-        (match_operand:VF 4 "nonimm_or_0_operand")
>+  [(set (match_operand:VFH 0 "register_operand")
>+      (vec_merge:VFH
>+        (plusminus:VFH
>+          (match_operand:VFH 2 "vector_operand")
>+          (match_operand:VFH 3 "vector_operand"))
>+        (match_operand:VFH 4 "nonimm_or_0_operand")
>         (match_operand:<avx512fmaskmode> 1 "register_operand")))]
>   "<MODE_SIZE> == 64 || TARGET_AVX512VL"
> {
>@@ -2207,12 +2207,12 @@
>    (set_attr "mode" "<ssescalarmode>")])
>
> (define_expand "cond_mul<mode>"
>-  [(set (match_operand:VF 0 "register_operand")
>-      (vec_merge:VF
>-        (mult:VF
>-          (match_operand:VF 2 "vector_operand")
>-          (match_operand:VF 3 "vector_operand"))
>-        (match_operand:VF 4 "nonimm_or_0_operand")
>+  [(set (match_operand:VFH 0 "register_operand")
>+      (vec_merge:VFH
>+        (mult:VFH
>+          (match_operand:VFH 2 "vector_operand")
>+          (match_operand:VFH 3 "vector_operand"))
>+        (match_operand:VFH 4 "nonimm_or_0_operand")
>         (match_operand:<avx512fmaskmode> 1 "register_operand")))]
>   "<MODE_SIZE> == 64 || TARGET_AVX512VL"
> {
>@@ -2322,12 +2322,12 @@
> })
>
> (define_expand "cond_div<mode>"
>-  [(set (match_operand:VF 0 "register_operand")
>-      (vec_merge:VF
>-        (div:VF
>-          (match_operand:VF 2 "register_operand")
>-          (match_operand:VF 3 "vector_operand"))
>-        (match_operand:VF 4 "nonimm_or_0_operand")
>+  [(set (match_operand:VFH 0 "register_operand")
>+      (vec_merge:VFH
>+        (div:VFH
>+          (match_operand:VFH 2 "register_operand")
>+          (match_operand:VFH 3 "vector_operand"))
>+        (match_operand:VFH 4 "nonimm_or_0_operand")
>         (match_operand:<avx512fmaskmode> 1 "register_operand")))]
>   "<MODE_SIZE> == 64 || TARGET_AVX512VL"
> {
>@@ -2660,12 +2660,12 @@
>    (set_attr "mode" "HF")])
>
> (define_expand "cond_<code><mode>"
>-  [(set (match_operand:VF 0 "register_operand")
>-      (vec_merge:VF
>-        (smaxmin:VF
>-          (match_operand:VF 2 "vector_operand")
>-          (match_operand:VF 3 "vector_operand"))
>-        (match_operand:VF 4 "nonimm_or_0_operand")
>+  [(set (match_operand:VFH 0 "register_operand")
>+      (vec_merge:VFH
>+        (smaxmin:VFH
>+          (match_operand:VFH 2 "vector_operand")
>+          (match_operand:VFH 3 "vector_operand"))
>+        (match_operand:VFH 4 "nonimm_or_0_operand")
>         (match_operand:<avx512fmaskmode> 1 "register_operand")))]
>   "<MODE_SIZE> == 64 || TARGET_AVX512VL"
> {
>@@ -4785,13 +4785,13 @@
>    (set_attr "mode" "<MODE>")])
>
> (define_expand "cond_fma<mode>"
>-  [(set (match_operand:VF_AVX512VL 0 "register_operand")
>-      (vec_merge:VF_AVX512VL
>-        (fma:VF_AVX512VL
>-          (match_operand:VF_AVX512VL 2 "vector_operand")
>-          (match_operand:VF_AVX512VL 3 "vector_operand")
>-          (match_operand:VF_AVX512VL 4 "vector_operand"))
>-        (match_operand:VF_AVX512VL 5 "nonimm_or_0_operand")
>+  [(set (match_operand:VFH_AVX512VL 0 "register_operand")
>+      (vec_merge:VFH_AVX512VL
>+        (fma:VFH_AVX512VL
>+          (match_operand:VFH_AVX512VL 2 "vector_operand")
>+          (match_operand:VFH_AVX512VL 3 "vector_operand")
>+          (match_operand:VFH_AVX512VL 4 "vector_operand"))
>+        (match_operand:VFH_AVX512VL 5 "nonimm_or_0_operand")
>         (match_operand:<avx512fmaskmode> 1 "register_operand")))]
>   "TARGET_AVX512F"
> {
>@@ -4885,14 +4885,14 @@
>    (set_attr "mode" "<MODE>")])
>
> (define_expand "cond_fms<mode>"
>-  [(set (match_operand:VF_AVX512VL 0 "register_operand")
>-      (vec_merge:VF_AVX512VL
>-        (fma:VF_AVX512VL
>-          (match_operand:VF_AVX512VL 2 "vector_operand")
>-          (match_operand:VF_AVX512VL 3 "vector_operand")
>-          (neg:VF_AVX512VL
>-            (match_operand:VF_AVX512VL 4 "vector_operand")))
>-        (match_operand:VF_AVX512VL 5 "nonimm_or_0_operand")
>+  [(set (match_operand:VFH_AVX512VL 0 "register_operand")
>+      (vec_merge:VFH_AVX512VL
>+        (fma:VFH_AVX512VL
>+          (match_operand:VFH_AVX512VL 2 "vector_operand")
>+          (match_operand:VFH_AVX512VL 3 "vector_operand")
>+          (neg:VFH_AVX512VL
>+            (match_operand:VFH_AVX512VL 4 "vector_operand")))
>+        (match_operand:VFH_AVX512VL 5 "nonimm_or_0_operand")
>         (match_operand:<avx512fmaskmode> 1 "register_operand")))]
>   "TARGET_AVX512F"
> {
>@@ -4988,14 +4988,14 @@
>    (set_attr "mode" "<MODE>")])
>
> (define_expand "cond_fnma<mode>"
>-  [(set (match_operand:VF_AVX512VL 0 "register_operand")
>-      (vec_merge:VF_AVX512VL
>-        (fma:VF_AVX512VL
>-          (neg:VF_AVX512VL
>-            (match_operand:VF_AVX512VL 2 "vector_operand"))
>-          (match_operand:VF_AVX512VL 3 "vector_operand")
>-          (match_operand:VF_AVX512VL 4 "vector_operand"))
>-        (match_operand:VF_AVX512VL 5 "nonimm_or_0_operand")
>+  [(set (match_operand:VFH_AVX512VL 0 "register_operand")
>+      (vec_merge:VFH_AVX512VL
>+        (fma:VFH_AVX512VL
>+          (neg:VFH_AVX512VL
>+            (match_operand:VFH_AVX512VL 2 "vector_operand"))
>+          (match_operand:VFH_AVX512VL 3 "vector_operand")
>+          (match_operand:VFH_AVX512VL 4 "vector_operand"))
>+        (match_operand:VFH_AVX512VL 5 "nonimm_or_0_operand")
>         (match_operand:<avx512fmaskmode> 1 "register_operand")))]
>   "TARGET_AVX512F"
> {
>@@ -5093,15 +5093,15 @@
>    (set_attr "mode" "<MODE>")])
>
> (define_expand "cond_fnms<mode>"
>-  [(set (match_operand:VF_AVX512VL 0 "register_operand")
>-      (vec_merge:VF_AVX512VL
>-        (fma:VF_AVX512VL
>-          (neg:VF_AVX512VL
>-            (match_operand:VF_AVX512VL 2 "vector_operand"))
>-          (match_operand:VF_AVX512VL 3 "vector_operand")
>-          (neg:VF_AVX512VL
>-            (match_operand:VF_AVX512VL 4 "vector_operand")))
>-        (match_operand:VF_AVX512VL 5 "nonimm_or_0_operand")
>+  [(set (match_operand:VFH_AVX512VL 0 "register_operand")
>+      (vec_merge:VFH_AVX512VL
>+        (fma:VFH_AVX512VL
>+          (neg:VFH_AVX512VL
>+            (match_operand:VFH_AVX512VL 2 "vector_operand"))
>+          (match_operand:VFH_AVX512VL 3 "vector_operand")
>+          (neg:VFH_AVX512VL
>+            (match_operand:VFH_AVX512VL 4 "vector_operand")))
>+        (match_operand:VFH_AVX512VL 5 "nonimm_or_0_operand")
>         (match_operand:<avx512fmaskmode> 1 "register_operand")))]
>   "TARGET_AVX512F"
> {
>diff --git a/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv__Float16-1.c
>b/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv__Float16-1.c
>new file mode 100644
>index 00000000000..b503b75d548
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv__Float16-1.c
>@@ -0,0 +1,9 @@
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -march=sapphirerapids -DTYPE=_Float16
>+-fdump-tree-vect" } */
>+/* { dg-final { scan-tree-dump ".COND_ADD" "vect" } } */
>+/* { dg-final { scan-tree-dump ".COND_SUB" "vect" } } */
>+/* { dg-final { scan-tree-dump ".COND_MUL" "vect" } } */
>+/* { dg-final { scan-tree-dump ".COND_RDIV" "vect" } } */
>+
>+#include "cond_op_addsubmuldiv_double-1.c"
>+
>diff --git a/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv__Float16-2.c
>b/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv__Float16-2.c
>new file mode 100644
>index 00000000000..e8397bbc5b1
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv__Float16-2.c
>@@ -0,0 +1,7 @@
>+/* { dg-do run } */
>+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mprefer-vector-width=256
>+-DTYPE=_Float16" } */
>+/* { dg-require-effective-target avx512vl } */
>+/* { dg-require-effective-target avx512fp16 } */
>+
>+#define AVX512FP16
>+#include "cond_op_addsubmuldiv_double-2.c"
>diff --git a/gcc/testsuite/gcc.target/i386/cond_op_fma__Float16-1.c
>b/gcc/testsuite/gcc.target/i386/cond_op_fma__Float16-1.c
>new file mode 100644
>index 00000000000..9ea45d690e2
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/i386/cond_op_fma__Float16-1.c
>@@ -0,0 +1,20 @@
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -march=sapphirerapids -DTYPE=_Float16
>+-fdump-tree-optimized -D__BUILTIN_FMA=__builtin_fmaf16" } */
>+/* { dg-final { scan-tree-dump-times ".COND_FMA" 3 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times ".COND_FNMA" 3 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times ".COND_FMS" 3 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times ".COND_FNMS" 3 "optimized" } } */
>+/* { dg-final { scan-assembler-times "vfmadd132ph\[
>+\\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } }
>+*/
>+/* { dg-final { scan-assembler-times "vfnmadd132ph\[
>+\\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } }
>+*/
>+/* { dg-final { scan-assembler-times "vfmsub132ph\[
>+\\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } }
>+*/
>+/* { dg-final { scan-assembler-times "vfnmsub132ph\[
>+\\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"  1 } }
>+*/
>+/* { dg-final { scan-assembler-times "vfmadd231ph\[
>+\\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
>+/* { dg-final { scan-assembler-times "vfnmadd231ph\[
>+\\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
>+/* { dg-final { scan-assembler-times "vfmsub231ph\[
>+\\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
>+/* { dg-final { scan-assembler-times "vfnmsub231ph\[
>+\\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
>+/* { dg-final { scan-assembler-times "vfmadd132ph\[
>+\\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
>+/* { dg-final { scan-assembler-times "vfnmadd132ph\[
>+\\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
>+/* { dg-final { scan-assembler-times "vfmsub132ph\[
>+\\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
>+/* { dg-final { scan-assembler-times "vfnmsub132ph\[
>+\\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
>+
>+#include "cond_op_fma_double-1.c"
>diff --git a/gcc/testsuite/gcc.target/i386/cond_op_fma__Float16-2.c
>b/gcc/testsuite/gcc.target/i386/cond_op_fma__Float16-2.c
>new file mode 100644
>index 00000000000..b7ee1cb8c95
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/i386/cond_op_fma__Float16-2.c
>@@ -0,0 +1,7 @@
>+/* { dg-do run } */
>+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mprefer-vector-width=256
>+-DTYPE=_Float16 -D__BUILTIN_FMA=__builtin_fmaf16" -DNUM=100 } */
>+/* { dg-require-effective-target avx512fp16 } */
>+/* { dg-require-effective-target avx512vl } */
>+
>+#define AVX512FP16
>+#include "cond_op_fma_double-2.c"
>diff --git a/gcc/testsuite/gcc.target/i386/cond_op_maxmin__Float16-1.c
>b/gcc/testsuite/gcc.target/i386/cond_op_maxmin__Float16-1.c
>new file mode 100644
>index 00000000000..b09410248f0
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/i386/cond_op_maxmin__Float16-1.c
>@@ -0,0 +1,8 @@
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -march=sapphirerapids -DTYPE=_Float16
>+-fdump-tree-optimized -DFN_MAX=__builtin_fmaxf16
>+-DFN_MIN=__builtin_fminf16" } */
>+/* { dg-final { scan-tree-dump ".COND_MAX" "optimized" } } */
>+/* { dg-final { scan-tree-dump ".COND_MIN" "optimized" } } */
>+/* { dg-final { scan-assembler-times "vmaxph"  1 } } */
>+/* { dg-final { scan-assembler-times "vminph"  1 } } */
>+
>+#include "cond_op_maxmin_double-1.c"
>diff --git a/gcc/testsuite/gcc.target/i386/cond_op_maxmin__Float16-2.c
>b/gcc/testsuite/gcc.target/i386/cond_op_maxmin__Float16-2.c
>new file mode 100644
>index 00000000000..b67adc8b2d3
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/i386/cond_op_maxmin__Float16-2.c
>@@ -0,0 +1,6 @@
>+/* { dg-do run } */
>+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mprefer-vector-width=256
>+-DTYPE=_Float16 -DFN_MAX=__builtin_fmaxf16 -
>DFN_MIN=__builtin_fminf16
>+-ffast-math" } */
>+/* { dg-require-effective-target avx512vl } */
>+/* { dg-require-effective-target avx512fp16 } */
>+
>+#include "cond_op_maxmin_double-2.c"
>--
>2.18.1
RE: [PATCH] AVX512FP16: Support cond_op for HFmode

Reply via email to