https://gcc.gnu.org/g:6c1de786e53a11150feb16ba990d0d6c6fd910db

commit r15-582-g6c1de786e53a11150feb16ba990d0d6c6fd910db
Author: Pan Li <pan2...@intel.com>
Date:   Thu May 16 10:02:40 2024 +0800

    RISC-V: Implement vectorizable early exit with vcond_mask_len
    
    After we support the loop lens for the vectorizable,  we would like to
    implement the feature for the RISC-V target.  Given below example:
    
    unsigned vect_a[1923];
    unsigned vect_b[1923];
    
    void test (unsigned limit, int n)
    {
      for (int i = 0; i < n; i++)
        {
          vect_b[i] = limit + i;
    
          if (vect_a[i] > limit)
            {
              ret = vect_b[i];
              return ret;
            }
    
          vect_a[i] = limit;
        }
    }
    
    Before this patch:
      ...
    .L8:
      sw    a3,0(a5)
      addiw a0,a0,1
      addi  a4,a4,4
      addi  a5,a5,4
      beq   a1,a0,.L2
    .L4:
      sw    a0,0(a4)
      lw    a2,0(a5)
      bleu  a2,a3,.L8
      ret
    
    After this patch:
      ...
    .L5:
      vsetvli   a5,a3,e8,mf4,ta,ma
      vmv1r.v   v4,v2
      vsetvli   t4,zero,e32,m1,ta,ma
      vmv.v.x   v1,a5
      vadd.vv   v2,v2,v1
      vsetvli   zero,a5,e32,m1,ta,ma
      vadd.vv   v5,v4,v3
      slli      a6,a5,2
      vle32.v   v1,0(t1)
      vmsltu.vv v1,v3,v1
      vcpop.m   t4,v1
      beq       t4,zero,.L4
      vmv.x.s   a4,v4
    .L3:
      ...
    
    The below tests are passed for this patch:
    1. The riscv fully regression tests.
    
    gcc/ChangeLog:
    
            * 
config/riscv/autovec-opt.md(*vcond_mask_len_popcount_<VB_VLS:mode><P:mode>):
            New pattern of vcond_mask_len_popcount for vector bool mode.
            * config/riscv/autovec.md (vcond_mask_len_<mode>): New pattern of
            vcond_mask_len for vector bool mode.
            (cbranch<mode>4): New pattern for vector bool mode.
            * config/riscv/vector-iterators.md: Add new unspec 
UNSPEC_SELECT_MASK.
            * config/riscv/vector.md (@pred_popcount<VB:mode><P:mode>): Add VLS 
mode
            to popcount pattern.
            (@pred_popcount<VB_VLS:mode><P:mode>): Ditto.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/autovec/early-break-1.c: New test.
            * gcc.target/riscv/rvv/autovec/early-break-2.c: New test.
    
    Signed-off-by: Pan Li <pan2...@intel.com>

Diff:
---
 gcc/config/riscv/autovec-opt.md                    | 33 ++++++++++++
 gcc/config/riscv/autovec.md                        | 61 ++++++++++++++++++++++
 gcc/config/riscv/vector-iterators.md               |  1 +
 gcc/config/riscv/vector.md                         | 18 +++----
 .../gcc.target/riscv/rvv/autovec/early-break-1.c   | 34 ++++++++++++
 .../gcc.target/riscv/rvv/autovec/early-break-2.c   | 37 +++++++++++++
 6 files changed, 175 insertions(+), 9 deletions(-)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 645dc53d8680..04f85d8e4553 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1436,3 +1436,36 @@
     DONE;
   }
   [(set_attr "type" "vmalu")])
+
+;; Optimization pattern for early break auto-vectorization
+;; vcond_mask_len (mask, ones, zeros, len, bias) + vlmax popcount
+;; -> non vlmax popcount (mask, len)
+(define_insn_and_split "*vcond_mask_len_popcount_<VB_VLS:mode><P:mode>"
+  [(set (match_operand:P 0 "register_operand")
+    (popcount:P
+     (unspec:VB_VLS [
+      (unspec:VB_VLS [
+       (match_operand:VB_VLS 1 "register_operand")
+       (match_operand:VB_VLS 2 "const_1_operand")
+       (match_operand:VB_VLS 3 "const_0_operand")
+       (match_operand 4 "autovec_length_operand")
+       (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK)
+      (match_operand 6 "autovec_length_operand")
+      (const_int 1)
+      (reg:SI VL_REGNUM)
+      (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
+  "TARGET_VECTOR
+   && can_create_pseudo_p ()
+   && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS 
(<VB_VLS:MODE>mode)).exists ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    riscv_vector::emit_nonvlmax_insn (
+       code_for_pred_popcount (<VB_VLS:MODE>mode, Pmode),
+       riscv_vector::CPOP_OP,
+       operands, operands[4]);
+    DONE;
+  }
+  [(set_attr "type" "vector")]
+)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index aa1ae0fe075b..1ee3c8052fb4 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2612,3 +2612,64 @@
     DONE;
   }
 )
+
+;; =========================================================================
+;; == Early break auto-vectorization patterns
+;; =========================================================================
+
+;; vcond_mask_len (mask, 1s, 0s, len, bias)
+;; => mask[i] = mask[i] && i < len ? 1 : 0
+(define_insn_and_split "vcond_mask_len_<mode>"
+  [(set (match_operand:VB 0 "register_operand")
+    (unspec: VB [
+     (match_operand:VB 1 "register_operand")
+     (match_operand:VB 2 "const_1_operand")
+     (match_operand:VB 3 "const_0_operand")
+     (match_operand 4 "autovec_length_operand")
+     (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK))]
+  "TARGET_VECTOR
+   && can_create_pseudo_p ()
+   && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS 
(<MODE>mode)).exists ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    machine_mode mode = riscv_vector::get_vector_mode (Pmode,
+                       GET_MODE_NUNITS (<MODE>mode)).require ();
+    rtx reg = gen_reg_rtx (mode);
+    riscv_vector::expand_vec_series (reg, const0_rtx, const1_rtx);
+    rtx dup_rtx = gen_rtx_VEC_DUPLICATE (mode, operands[4]);
+    insn_code icode = code_for_pred_cmp_scalar (mode);
+    rtx cmp = gen_rtx_fmt_ee (LTU, <MODE>mode, reg, dup_rtx);
+    rtx ops[] = {operands[0], operands[1], operands[1], cmp, reg, operands[4]};
+    emit_vlmax_insn (icode, riscv_vector::COMPARE_OP_MU, ops);
+    DONE;
+  }
+  [(set_attr "type" "vector")])
+
+;; cbranch
+(define_expand "cbranch<mode>4"
+  [(set (pc)
+       (if_then_else
+         (match_operator 0 "equality_operator"
+           [(match_operand:VB_VLS 1 "register_operand")
+            (match_operand:VB_VLS 2 "reg_or_0_operand")])
+         (label_ref (match_operand 3 ""))
+         (pc)))]
+  "TARGET_VECTOR"
+  {
+    rtx pred;
+    if (operands[2] == CONST0_RTX (<MODE>mode))
+      pred = operands[1];
+    else
+      pred = expand_binop (<MODE>mode, xor_optab, operands[1],
+                          operands[2], NULL_RTX, 0,
+                          OPTAB_DIRECT);
+    rtx reg = gen_reg_rtx (Pmode);
+    rtx cpop_ops[] = {reg, pred};
+    emit_vlmax_insn (code_for_pred_popcount (<MODE>mode, Pmode),
+                    riscv_vector::CPOP_OP, cpop_ops);
+    operands[1] = reg;
+    operands[2] = const0_rtx;
+  }
+)
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index a24e1bf078fb..76c27035a735 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -102,6 +102,7 @@
   UNSPEC_WREDUC_SUMU
   UNSPEC_WREDUC_SUM_ORDERED
   UNSPEC_WREDUC_SUM_UNORDERED
+  UNSPEC_SELECT_MASK
 ])
 
 (define_c_enum "unspecv" [
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 228d0f9a7663..95451dc762b0 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -6121,21 +6121,21 @@
    (set_attr "vl_op_idx" "4")
    (set (attr "avl_type_idx") (const_int 5))])
 
-(define_insn "@pred_popcount<VB:mode><P:mode>"
-  [(set (match_operand:P 0 "register_operand"               "=r")
+(define_insn "@pred_popcount<VB_VLS:mode><P:mode>"
+  [(set (match_operand:P 0 "register_operand"                   "=r")
        (popcount:P
-         (unspec:VB
-           [(and:VB
-              (match_operand:VB 1 "vector_mask_operand" "vmWc1")
-              (match_operand:VB 2 "register_operand"    "   vr"))
-            (match_operand 3 "vector_length_operand"    "   rK")
-            (match_operand 4 "const_int_operand"        "    i")
+         (unspec:VB_VLS
+           [(and:VB_VLS
+              (match_operand:VB_VLS 1 "vector_mask_operand" "vmWc1")
+              (match_operand:VB_VLS 2 "register_operand"    "   vr"))
+            (match_operand 3 "vector_length_operand"        "   rK")
+            (match_operand 4 "const_int_operand"            "    i")
             (reg:SI VL_REGNUM)
             (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
   "TARGET_VECTOR"
   "vcpop.m\t%0,%2%p1"
   [(set_attr "type" "vmpop")
-   (set_attr "mode" "<VB:MODE>")])
+   (set_attr "mode" "<VB_VLS:MODE>")])
 
 (define_insn "@pred_ffs<VB:mode><P:mode>"
   [(set (match_operand:P 0 "register_operand"                 "=r")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c
new file mode 100644
index 000000000000..f70979e81f11
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
-fno-schedule-insns2 -fdump-tree-vect-details" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 803
+
+unsigned vect_a[N];
+unsigned vect_b[N];
+
+/*
+** test:
+** ...
+** vmsltu\.vv\s+v[0-9]+\s*,v[0-9]+,\s*v[0-9]+
+** vcpop\.m\s+[atx][0-9]+\s*,v[0-9]+
+** ...
+*/
+unsigned test (unsigned x, int n)
+{
+  unsigned ret = 0;
+
+  for (int i = 0; i < n; i++)
+    {
+      vect_b[i] = x + i;
+
+      if (vect_a[i] > x)
+        break;
+
+      vect_a[i] = x;
+    }
+
+  return ret;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c
new file mode 100644
index 000000000000..d405783d2c43
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-2.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
-fno-schedule-insns2 -fdump-tree-vect-details" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 1728
+
+unsigned vect_a[N];
+unsigned vect_b[N];
+
+/*
+** test:
+** ...
+** vmsltu\.vv\s+v[0-9]+\s*,v[0-9]+,\s*v[0-9]+
+** vcpop\.m\s+[atx][0-9]+\s*,v[0-9]+
+** ...
+*/
+unsigned test (unsigned limit, int n)
+{
+  unsigned ret = 0;
+
+  for (int i = 0; i < n; i++)
+    {
+      vect_b[i] = limit + i;
+
+      if (vect_a[i] > limit)
+       {
+         ret = vect_b[i];
+         return ret;
+       }
+
+      vect_a[i] = limit;
+    }
+
+  return ret;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */

Reply via email to