Uros Bizjak <[email protected]> 于2024年11月7日周四 15:22写道:
>
> On Thu, Nov 7, 2024 at 6:58 AM Hongyu Wang <[email protected]> wrote:
> >
> > Hi,
> >
> > We recently supports cbranchbf4 with AVX10_2 native bf16 comi
> > instructions, so do similar to cstorebf4.
> >
> > Bootstrapped & regtested on x86_64-pc-linux-gnu.
> > Ok for trunk?
> >
> > gcc/ChangeLog:
> >
> > * config/i386/i386.md (cstorebf4): Use vcomsbf16 under
> > TARGET_AVX10_2_256 and -fno-trapping-math.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/avx10_2-comibf-3.c: New test.
> > * gcc.target/i386/avx10_2-comibf-4.c: Likewise.
>
> OK.
>
> While there, can you please also fix formatting of new code in
> cbranchbf4? There is no need for curly braces and alignment is wrong.
>
Yes, the attached patch is what I'm pusing. Thanks.
From f848974e9237f04ecb486c9782bd0bb392974dc5 Mon Sep 17 00:00:00 2001
From: Hongyu Wang <[email protected]>
Date: Tue, 5 Nov 2024 17:19:34 +0800
Subject: [PATCH] i386: Support cstorebf4 with native bf16 comi
We recently supports cbranchbf4 with AVX10_2 native bf16 comi
instructions, so do similar to cstorebf4.
gcc/ChangeLog:
* config/i386/i386.md (cstorebf4): Use vcomsbf16 under
TARGET_AVX10_2_256 and -fno-trapping-math.
(cbranchbf4): Adjust formatting.
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx10_2-comibf-3.c: New test.
* gcc.target/i386/avx10_2-comibf-4.c: Likewise.
---
gcc/config/i386/i386.md | 24 ++++++-----
.../gcc.target/i386/avx10_2-comibf-3.c | 27 ++++++++++++
.../gcc.target/i386/avx10_2-comibf-4.c | 41 +++++++++++++++++++
3 files changed, 82 insertions(+), 10 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-comibf-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-comibf-4.c
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c492fe55881..34bc04622b1 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1818,10 +1818,8 @@ (define_expand "cbranchbf4"
"TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
{
if (TARGET_AVX10_2_256 && !flag_trapping_math)
- {
- ix86_expand_branch (GET_CODE (operands[0]),
- operands[1], operands[2], operands[3]);
- }
+ ix86_expand_branch (GET_CODE (operands[0]),
+ operands[1], operands[2], operands[3]);
else
{
rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[1]);
@@ -1860,12 +1858,18 @@ (define_expand "cstorebf4"
(const_int 0)]))]
"TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
{
- rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
- rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]);
- rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]),
- op1, op2, SFmode, 0, 1);
- if (!rtx_equal_p (res, operands[0]))
- emit_move_insn (operands[0], res);
+ if (TARGET_AVX10_2_256 && !flag_trapping_math)
+ ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3]);
+ else
+ {
+ rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
+ rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]);
+ rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]),
+ op1, op2, SFmode, 0, 1);
+ if (!rtx_equal_p (res, operands[0]))
+ emit_move_insn (operands[0], res);
+ }
DONE;
})
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-comibf-3.c b/gcc/testsuite/gcc.target/i386/avx10_2-comibf-3.c
new file mode 100644
index 00000000000..afa41a3f071
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-comibf-3.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v3 -O2" } */
+
+/* { dg-final { scan-assembler-times "vcomsbf16\[ \\t\]+\[^{}\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6 } } */
+/* { dg-final { scan-assembler-times "set\[aeglnb\]+" 6 } } */
+
+#define AVX10_ATTR \
+__attribute__((noinline, __target__("avx10.2"), optimize("no-trapping-math")))
+
+AVX10_ATTR
+int foo1_avx10 (__bf16 a, __bf16 b, __bf16 c, __bf16 d)
+{
+ return a == b && c < d;
+}
+
+AVX10_ATTR
+int foo2_avx10 (__bf16 a, __bf16 b, __bf16 c, __bf16 d)
+{
+ return a > b || c != d;
+}
+
+AVX10_ATTR
+int foo3_avx10 (__bf16 a, __bf16 b, __bf16 c, __bf16 d)
+{
+ return (a >= b) * (c <= d);
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-comibf-4.c b/gcc/testsuite/gcc.target/i386/avx10_2-comibf-4.c
new file mode 100644
index 00000000000..18848ddb5e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-comibf-4.c
@@ -0,0 +1,41 @@
+/* { dg-do run { target { avx10_2 } } } */
+/* { dg-options "-march=x86-64-v3 -O2" } */
+
+#include "avx10_2-comibf-3.c"
+
+__attribute__((noinline))
+int foo1 (__bf16 a, __bf16 b, __bf16 c, __bf16 d)
+{
+ return a == b && c < d;
+}
+
+__attribute__((noinline))
+int foo2 (__bf16 a, __bf16 b, __bf16 c, __bf16 d)
+{
+ return a > b || c != d;
+}
+
+__attribute__((noinline))
+int foo3 (__bf16 a, __bf16 b, __bf16 c, __bf16 d)
+{
+ return (a >= b) * (c <= d);
+}
+
+
+int main (void)
+{
+ if (!__builtin_cpu_supports ("avx10.2"))
+ return 0;
+
+ __bf16 a = 0.5bf16, b = -0.25bf16, c = 1.75bf16, d = -0.125bf16;
+
+ if (foo1_avx10 (a, b, c, d) != foo1 (a, b, c, d))
+ __builtin_abort ();
+
+ if (foo2_avx10 (b, c, d, a) != foo2 (b, c, d, a))
+ __builtin_abort ();
+
+ if (foo3_avx10 (c, d, a, b) != foo3 (c, d, a, b))
+ __builtin_abort ();
+}
+
--
2.31.1