On 7/3/23 10:45, juzhe.zh...@rivai.ai wrote:
> We can apply it but not sure why the patchwork shows it's rejected.

I believe it also failed for me locally because the order of
patterns in autovec-opt.md was somehow different.  The one attached
worked for me though after some minor merge adjustments on my branch.

Regards
 Robin

>From 29b12a473a31b2caa64fa2d1d97920a460ced0a2 Mon Sep 17 00:00:00 2001
From: Juzhe-Zhong <juzhe.zh...@rivai.ai>
Date: Wed, 28 Jun 2023 12:15:12 +0800
Subject: [PATCH] RISC-V: Support vfwmul.vv combine lowering

Consider the following complicate case:
#define TEST_TYPE(TYPE1, TYPE2)                                                \
  __attribute__ ((noipa)) void vwadd_##TYPE1_##TYPE2 (                         \
    TYPE1 *__restrict dst, TYPE1 *__restrict dst2, TYPE1 *__restrict dst3,     \
    TYPE1 *__restrict dst4, TYPE2 *__restrict a, TYPE2 *__restrict b,          \
    TYPE2 *__restrict a2, TYPE2 *__restrict b2, int n)                         \
  {                                                                            \
    for (int i = 0; i < n; i++)                                                \
      {                                                                        \
        dst[i] = (TYPE1) a[i] * (TYPE1) b[i];                                  \
        dst2[i] = (TYPE1) a2[i] * (TYPE1) b[i];                                \
        dst3[i] = (TYPE1) a2[i] * (TYPE1) a[i];                                \
        dst4[i] = (TYPE1) a[i] * (TYPE1) b2[i];                                \
      }                                                                        \
  }

TEST_TYPE (double, float)

Such complicate situation, Combine PASS can not combine extension of both 
operands on the fly.
So the combine PASS will first try to combine one of the combine extension, and 
then combine
the other. The combine flow is as follows:

Original IR:
(set (reg 0) (float_extend: (reg 1))
(set (reg 3) (float_extend: (reg 2))
(set (reg 4) (mult: (reg 0) (reg 3))

First step of combine:
(set (reg 3) (float_extend: (reg 2))
(set (reg 4) (mult: (float_extend: (reg 1) (reg 3))

Second step of combine:
(set (reg 4) (mult: (float_extend: (reg 1) (float_extend: (reg 2))

So, to enhance the combine optimization, we add a "pseudo vwfmul.wv" RTL 
pattern in autovec-opt.md
which is (set (reg 0) (mult (float_extend (reg 1) (reg 2)))).

gcc/ChangeLog:

        * config/riscv/autovec-opt.md 
(@pred_single_widen_mul<any_extend:su><mode>): Change "@" into "*" in pattern 
name which simplifies build files.
        (*pred_single_widen_mul<any_extend:su><mode>): Ditto.
        (*pred_single_widen_mul<mode>): New pattern.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/widen/widen-3.c: Add floating-point.
        * gcc.target/riscv/rvv/autovec/widen/widen-7.c: Ditto.
        * gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c: Ditto.
        * gcc.target/riscv/rvv/autovec/widen/widen_run-3.c: Ditto.
        * gcc.target/riscv/rvv/autovec/widen/widen_run-7.c: Ditto.
        * gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c: New test.
        * gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c: New test.
---
 gcc/config/riscv/autovec-opt.md               | 39 +++++++++++++++++++
 .../riscv/rvv/autovec/widen/widen-3.c         |  7 +++-
 .../riscv/rvv/autovec/widen/widen-7.c         |  7 +++-
 .../rvv/autovec/widen/widen-complicate-3.c    |  7 +++-
 .../riscv/rvv/autovec/widen/widen_run-3.c     |  5 ++-
 .../riscv/rvv/autovec/widen/widen_run-7.c     |  5 ++-
 .../rvv/autovec/widen/widen_run_zvfh-3.c      | 28 +++++++++++++
 .../rvv/autovec/widen/widen_run_zvfh-7.c      | 28 +++++++++++++
 8 files changed, 116 insertions(+), 10 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index fd9cd27f50a..99b609a99d9 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -406,6 +406,45 @@ (define_insn "*pred_extract_first_sextsi<mode>"
   [(set_attr "type" "vimovvx")
    (set_attr "mode" "<MODE>")])
 
+;; We don't have vfwmul.wv instruction like vfwadd.wv in RVV.
+;; This pattern is an intermediate RTL IR as a pseudo vfwmul.wv to enhance
+;; optimization of instructions combine.
+(define_insn_and_split "*pred_single_widen_mul<mode>"
+  [(set (match_operand:VWEXTF 0 "register_operand"                  "=&vr,  
&vr")
+       (if_then_else:VWEXTF
+         (unspec:<VM>
+           [(match_operand:<VM> 1 "vector_mask_operand"           
"vmWc1,vmWc1")
+            (match_operand 5 "vector_length_operand"              "   rK,   
rK")
+            (match_operand 6 "const_int_operand"                  "    i,    
i")
+            (match_operand 7 "const_int_operand"                  "    i,    
i")
+            (match_operand 8 "const_int_operand"                  "    i,    
i")
+            (match_operand 9 "const_int_operand"                  "    i,    
i")
+            (reg:SI VL_REGNUM)
+            (reg:SI VTYPE_REGNUM)
+            (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
+         (mult:VWEXTF
+           (float_extend:VWEXTF
+             (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "   vr,   
vr"))
+           (match_operand:VWEXTF 3 "register_operand"             "   vr,   
vr"))
+         (match_operand:VWEXTF 2 "vector_merge_operand"           "   vu,    
0")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    insn_code icode = code_for_pred_extend (<MODE>mode);
+    rtx tmp = gen_reg_rtx (<MODE>mode);
+    rtx ops[] = {tmp, operands[4]};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, ops);
+
+    emit_insn (gen_pred (MULT, <MODE>mode, operands[0], operands[1], 
operands[2],
+                        operands[3], tmp, operands[5], operands[6],
+                        operands[7], operands[8], operands[9]));
+    DONE;
+  }
+  [(set_attr "type" "vfwmul")
+   (set_attr "mode" "<MODE>")])
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] VFWMACC
 ;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c
index 609a5c09f70..b2b14405902 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d 
--param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d 
--param=riscv-autovec-preference=scalable -ffast-math" } */
 
 #include <stdint-gcc.h>
 
@@ -19,9 +19,12 @@
   TEST_TYPE (int32_t, int16_t)                                                 
\
   TEST_TYPE (uint32_t, uint16_t)                                               
\
   TEST_TYPE (int64_t, int32_t)                                                 
\
-  TEST_TYPE (uint64_t, uint32_t)
+  TEST_TYPE (uint64_t, uint32_t)                                               
\
+  TEST_TYPE (float, _Float16)                                                  
\
+  TEST_TYPE (double, float)
 
 TEST_ALL ()
 
 /* { dg-final { scan-assembler-times {\tvwmul\.vv} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwmulu\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfwmul\.vv} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-7.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-7.c
index cc43d9ba3fe..3806e8b98ee 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-7.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-7.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d 
--param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d 
--param=riscv-autovec-preference=scalable -ffast-math" } */
 
 #include <stdint-gcc.h>
 
@@ -19,9 +19,12 @@
   TEST_TYPE (int32_t, int16_t)                                                 
\
   TEST_TYPE (uint32_t, uint16_t)                                               
\
   TEST_TYPE (int64_t, int32_t)                                                 
\
-  TEST_TYPE (uint64_t, uint32_t)
+  TEST_TYPE (uint64_t, uint32_t)                                               
\
+  TEST_TYPE (float, _Float16)                                                  
\
+  TEST_TYPE (double, float)
 
 TEST_ALL ()
 
 /* { dg-final { scan-assembler-times {\tvsext\.vf2} 3 } } */
 /* { dg-final { scan-assembler-times {\tvzext\.vf2} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfwcvt} 2 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c
index e1fd79430c3..1515374890d 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d 
--param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d 
--param=riscv-autovec-preference=scalable -ffast-math" } */
 
 #include <stdint-gcc.h>
 
@@ -24,9 +24,12 @@
   TEST_TYPE (int32_t, int16_t)                                                 
\
   TEST_TYPE (uint32_t, uint16_t)                                               
\
   TEST_TYPE (int64_t, int32_t)                                                 
\
-  TEST_TYPE (uint64_t, uint32_t)
+  TEST_TYPE (uint64_t, uint32_t)                                               
\
+  TEST_TYPE (float, _Float16)                                                  
\
+  TEST_TYPE (double, float)
 
 TEST_ALL ()
 
 /* { dg-final { scan-assembler-times {\tvwmul\.vv} 12 } } */
 /* { dg-final { scan-assembler-times {\tvwmulu\.vv} 12 } } */
+/* { dg-final { scan-assembler-times {\tvfwmul\.vv} 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c
index beb0cc2b58b..b7dd60fa8e8 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target { riscv_vector } } } */
-/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable 
-ffast-math" } */
 
 #include <assert.h>
 #include "widen-3.c"
@@ -25,7 +25,8 @@
   RUN (int32_t, int16_t, -32768)                                               
\
   RUN (uint32_t, uint16_t, 65535)                                              
\
   RUN (int64_t, int32_t, -2147483648)                                          
\
-  RUN (uint64_t, uint32_t, 4294967295)
+  RUN (uint64_t, uint32_t, 4294967295)                                         
\
+  RUN (double, float, -2147483648)
 
 int
 main ()
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-7.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-7.c
index 4abddd5d718..ab29f4a0f70 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-7.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-7.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target { riscv_vector } } } */
-/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable 
-ffast-math" } */
 
 #include <assert.h>
 #include "widen-7.c"
@@ -25,7 +25,8 @@
   RUN (int32_t, int16_t, -32768)                                               
\
   RUN (uint32_t, uint16_t, 65535)                                              
\
   RUN (int64_t, int32_t, -2147483648)                                          
\
-  RUN (uint64_t, uint32_t, 4294967295)
+  RUN (uint64_t, uint32_t, 4294967295)                                         
\
+  RUN (double, float, -2147483648)
 
 int
 main ()
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c
new file mode 100644
index 00000000000..c3efd0b97bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-3.c
@@ -0,0 +1,28 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable 
-ffast-math" } */
+
+#include <assert.h>
+#include "widen-3.c"
+
+#define SZ 512
+
+#define RUN(TYPE1, TYPE2, LIMIT)                                               
\
+  TYPE2 a##TYPE2[SZ];                                                          
\
+  TYPE2 b##TYPE2[SZ];                                                          
\
+  TYPE1 dst##TYPE1[SZ];                                                        
\
+  for (int i = 0; i < SZ; i++)                                                 
\
+    {                                                                          
\
+      a##TYPE2[i] = LIMIT + i % 8723;                                          
\
+      b##TYPE2[i] = LIMIT + i & 1964;                                          
\
+    }                                                                          
\
+  vwmul_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ);                  
\
+  for (int i = 0; i < SZ; i++)                                                 
\
+    assert (dst##TYPE1[i] == ((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE2[i]));
+
+#define RUN_ALL() RUN (float, _Float16, -32768)
+
+int
+main ()
+{
+  RUN_ALL ()
+}
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c
new file mode 100644
index 00000000000..60e2401c088
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run_zvfh-7.c
@@ -0,0 +1,28 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable 
-ffast-math" } */
+
+#include <assert.h>
+#include "widen-7.c"
+
+#define SZ 512
+
+#define RUN(TYPE1, TYPE2, LIMIT)                                               
\
+  TYPE2 a##TYPE2[SZ];                                                          
\
+  TYPE1 b##TYPE1[SZ];                                                          
\
+  TYPE1 dst##TYPE1[SZ];                                                        
\
+  for (int i = 0; i < SZ; i++)                                                 
\
+    {                                                                          
\
+      a##TYPE2[i] = LIMIT + i % LIMIT;                                         
\
+      b##TYPE1[i] = LIMIT + i & LIMIT;                                         
\
+    }                                                                          
\
+  vwmul_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE1, SZ);                  
\
+  for (int i = 0; i < SZ; i++)                                                 
\
+    assert (dst##TYPE1[i] == (((TYPE1) a##TYPE2[i]) * b##TYPE1[i]));
+
+#define RUN_ALL() RUN (float, _Float16, -32768)
+
+int
+main ()
+{
+  RUN_ALL ()
+}
-- 
2.41.0


Reply via email to