[PATCH v4] RISC-V: Fix register overlap issue for some xtheadvector instructions

Jun Sha (Joshua) Tue, 02 Jan 2024 23:54:52 -0800

For th.vmadc/th.vmsbc as well as narrowing arithmetic instructions
and floating-point compare instructions, an illegal instruction
exception will be raised if the destination vector register overlaps
a source vector register group.


To handle this issue, we use "group_overlap" and "enabled" attribute
to disable some alternatives for xtheadvector.

gcc/ChangeLog:

        * config/riscv/riscv.md (none,W21,W42,W84,W43,W86,W87,W0):
        (none,W21,W42,W84,W43,W86,W87,W0,th):
        Add group-overlap constraint for xtheadvector.
        * config/riscv/vector.md: 
        Disable alternatives that destination register overlaps
        source register group for xtheadvector.

Co-authored-by: Jin Ma <ji...@linux.alibaba.com>
Co-authored-by: Xianmiao Qu <cooper...@linux.alibaba.com>
Co-authored-by: Christoph Müllner <christoph.muell...@vrull.eu>
---
 gcc/config/riscv/riscv.md  |   6 +-
 gcc/config/riscv/vector.md | 314 +++++++++++++++++++++----------------
 2 files changed, 185 insertions(+), 135 deletions(-)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 68f7203b676..d736501784d 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -504,7 +504,7 @@
 ;; Widening instructions have group-overlap constraints.  Those are only
 ;; valid for certain register-group sizes.  This attribute marks the
 ;; alternatives not matching the required register-group size as disabled.
-(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0"
+(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0,th"
   (const_string "none"))
 
 (define_attr "group_overlap_valid" "no,yes"
@@ -543,6 +543,10 @@
          (and (eq_attr "group_overlap" "W0")
              (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
> 1"))
         (const_string "no")
+
+         (and (eq_attr "group_overlap" "th")
+             (match_test "TARGET_XTHEADVECTOR"))
+        (const_string "no")
         ]
        (const_string "yes")))
 
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index cb30c9ae97c..63d0573d4aa 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3248,7 +3248,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "th,none,none")])
 
 (define_insn "@pred_msbc<mode>"
   [(set (match_operand:<VM> 0 "register_operand"        "=vr, vr, &vr")
@@ -3267,7 +3268,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "th,th,none")])
 
 (define_insn "@pred_madc<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, &vr")
@@ -3287,7 +3289,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "th,none")])
 
 (define_insn "@pred_msbc<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, &vr")
@@ -3307,7 +3310,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "th,none")])
 
 (define_expand "@pred_madc<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -3356,7 +3360,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "th,none")])
 
 (define_insn "*pred_madc<mode>_extended_scalar"
   [(set (match_operand:<VM> 0 "register_operand"             "=vr, &vr")
@@ -3377,7 +3382,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "th,none")])
 
 (define_expand "@pred_msbc<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -3426,7 +3432,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "th,none")])
 
 (define_insn "*pred_msbc<mode>_extended_scalar"
   [(set (match_operand:<VM> 0 "register_operand"              "=vr, &vr")
@@ -3447,7 +3454,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "group_overlap" "th,none")])
 
 (define_insn "@pred_madc<mode>_overflow"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, &vr, &vr")
@@ -3465,7 +3473,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "th,none,none")])
 
 (define_insn "@pred_msbc<mode>_overflow"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, vr, &vr, &vr")
@@ -3483,7 +3492,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "th,th,none,none")])
 
 (define_insn "@pred_madc<mode>_overflow_scalar"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, &vr")
@@ -3502,7 +3512,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "th,none")])
 
 (define_insn "@pred_msbc<mode>_overflow_scalar"
   [(set (match_operand:<VM> 0 "register_operand"         "=vr, &vr")
@@ -3521,7 +3532,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "th,none")])
 
 (define_expand "@pred_madc<mode>_overflow_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -3568,7 +3580,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "th,none")])
 
 (define_insn "*pred_madc<mode>_overflow_extended_scalar"
   [(set (match_operand:<VM> 0 "register_operand"             "=vr, &vr")
@@ -3588,7 +3601,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "th,none")])
 
 (define_expand "@pred_msbc<mode>_overflow_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -3635,7 +3649,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "th,none")])
 
 (define_insn "*pred_msbc<mode>_overflow_extended_scalar"
   [(set (match_operand:<VM> 0 "register_operand"             "=vr, &vr")
@@ -3655,7 +3670,8 @@
   [(set_attr "type" "vicalu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "3")
-   (set (attr "avl_type_idx") (const_int 4))])
+   (set (attr "avl_type_idx") (const_int 4))
+   (set_attr "group_overlap" "th,none")])
 
 ;; 
-------------------------------------------------------------------------------
 ;; ---- Predicated integer unary operations
@@ -3975,7 +3991,8 @@
   "TARGET_VECTOR"
   "vn<insn>.w%o4\t%0,%3,%v4%p1"
   [(set_attr "type" "vnshift")
-   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")
+   (set_attr "group_overlap" 
"none,none,th,th,none,th,none,none,none,th,none,none")])
 
 (define_insn "@pred_narrow_<optab><mode>_scalar"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"           "=vd, 
vd, vr, vr,  &vr,  &vr")
@@ -3996,7 +4013,8 @@
   "TARGET_VECTOR"
   "vn<insn>.w%o4\t%0,%3,%4%p1"
   [(set_attr "type" "vnshift")
-   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")
+   (set_attr "group_overlap" "none,none,th,th,none,none")])
 
 ;; vncvt.x.x.w
 (define_insn "@pred_trunc<mode>"
@@ -4020,7 +4038,8 @@
    (set_attr "vl_op_idx" "4")
    (set (attr "ta") (symbol_ref "riscv_vector::get_ta(operands[5])"))
    (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
-   (set (attr "avl_type_idx") (const_int 7))])
+   (set (attr "avl_type_idx") (const_int 7))
+   (set_attr "group_overlap" "none,none,th,th,none,none")])
 
 ;; 
-------------------------------------------------------------------------------
 ;; ---- Predicated fixed-point operations
@@ -4426,7 +4445,8 @@
   "TARGET_VECTOR"
   "vnclip<v_su>.w%o4\t%0,%3,%v4%p1"
   [(set_attr "type" "vnclip")
-   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")
+   (set_attr "group_overlap" "th,th,th,th,th,th,none,none,th,th,none,none")])
 
 (define_insn "@pred_narrow_clip<v_su><mode>_scalar"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"           "=vd, 
vd, vr, vr,  &vr,  &vr")
@@ -4448,7 +4468,8 @@
   "TARGET_VECTOR"
   "vnclip<v_su>.w%o4\t%0,%3,%4%p1"
   [(set_attr "type" "vnclip")
-   (set_attr "mode" "<V_DOUBLE_TRUNC>")])
+   (set_attr "mode" "<V_DOUBLE_TRUNC>")
+   (set_attr "group_overlap" "th,th,th,th,none,none")])
 
 ;; 
-------------------------------------------------------------------------------
 ;; ---- Predicated integer comparison operations
@@ -4499,23 +4520,24 @@
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_cmp<mode>"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   
vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   
vr,   vr,   &vr,   &vr,   &vr,   &vr")
        (if_then_else:<VM>
          (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"      
"vmWc1,vmWc1,vmWc1,vmWc1")
-            (match_operand 6 "vector_length_operand"         "   rK,   rK,   
rK,   rK")
-            (match_operand 7 "const_int_operand"             "    i,    i,    
i,    i")
-            (match_operand 8 "const_int_operand"             "    i,    i,    
i,    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"      
"vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+            (match_operand 6 "vector_length_operand"         "   rK,   rK,   
rK,   rK,   rK,   rK,   rK,   rK")
+            (match_operand 7 "const_int_operand"             "    i,    i,    
i,    i,    i,    i,    i,    i")
+            (match_operand 8 "const_int_operand"             "    i,    i,    
i,    i,    i,    i,    i,    i")
             (reg:SI VL_REGNUM)
             (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
          (match_operator:<VM> 3 "comparison_except_ltge_operator"
-            [(match_operand:V_VLSI 4 "register_operand"          "   vr,   vr, 
  vr,   vr")
-             (match_operand:V_VLSI 5 "vector_arith_operand"      "   vr,   vr, 
  vi,   vi")])
-         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,   
vu,    0")))]
+            [(match_operand:V_VLSI 4 "register_operand"          "   vr,   vr, 
  vr,   vr,   vr,   vr,   vr,   vr")
+             (match_operand:V_VLSI 5 "vector_arith_operand"      "   vr,   vr, 
  vi,   vi,   vr,   vr,   vi,   vi")])
+         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,   
vu,    0,   vu,    0,   vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.v%o5\t%0,%4,%v5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "th,th,th,th,none,none,none,none")])
 
 ;; We use early-clobber for source LMUL > dest LMUL.
 (define_insn "*pred_cmp<mode>_narrow"
@@ -4535,7 +4557,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.v%o5\t%0,%4,%v5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,th,th,th,th,th,th,none,none")])
 
 (define_expand "@pred_ltge<mode>"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -4579,23 +4602,24 @@
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_ltge<mode>"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   
vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   
vr,   vr,   &vr,   &vr,   &vr,   &vr")
        (if_then_else:<VM>
          (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"      
"vmWc1,vmWc1,vmWc1,vmWc1")
-            (match_operand 6 "vector_length_operand"         "   rK,   rK,   
rK,   rK")
-            (match_operand 7 "const_int_operand"             "    i,    i,    
i,    i")
-            (match_operand 8 "const_int_operand"             "    i,    i,    
i,    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"      
"vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+            (match_operand 6 "vector_length_operand"         "   rK,   rK,   
rK,   rK,   rK,   rK,   rK,   rK")
+            (match_operand 7 "const_int_operand"             "    i,    i,    
i,    i,    i,    i,    i,    i")
+            (match_operand 8 "const_int_operand"             "    i,    i,    
i,    i,    i,    i,    i,    i")
             (reg:SI VL_REGNUM)
             (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
          (match_operator:<VM> 3 "ltge_operator"
-            [(match_operand:V_VLSI 4 "register_operand"          "   vr,   vr, 
  vr,   vr")
-             (match_operand:V_VLSI 5 "vector_neg_arith_operand"  "   vr,   vr, 
  vj,   vj")])
-         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,   
vu,    0")))]
+            [(match_operand:V_VLSI 4 "register_operand"          "   vr,   vr, 
  vr,   vr,   vr,   vr,   vr,   vr")
+             (match_operand:V_VLSI 5 "vector_neg_arith_operand"  "   vr,   vr, 
  vj,   vj,   vr,   vr,   vj,   vj")])
+         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,   
vu,    0,    vu,    0,   vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.v%o5\t%0,%4,%v5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "th,th,th,th,none,none,none,none")])
 
 ;; We use early-clobber for source LMUL > dest LMUL.
 (define_insn "*pred_ltge<mode>_narrow"
@@ -4615,7 +4639,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.v%o5\t%0,%4,%v5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,th,th,th,th,th,th,none,none")])
 
 (define_expand "@pred_cmp<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -4661,24 +4686,25 @@
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_cmp<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   
&vr,   &vr")
        (if_then_else:<VM>
          (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-            (match_operand 6 "vector_length_operand"         "   rK,   rK")
-            (match_operand 7 "const_int_operand"             "    i,    i")
-            (match_operand 8 "const_int_operand"             "    i,    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"      
"vmWc1,vmWc1,vmWc1,vmWc1")
+            (match_operand 6 "vector_length_operand"         "   rK,   rK,   
rK,   rK")
+            (match_operand 7 "const_int_operand"             "    i,    i,    
i,    i")
+            (match_operand 8 "const_int_operand"             "    i,    i,    
i,    i")
             (reg:SI VL_REGNUM)
             (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
          (match_operator:<VM> 3 "comparison_except_eqge_operator"
-            [(match_operand:V_VLSI_QHS 4 "register_operand"      "   vr,   vr")
+            [(match_operand:V_VLSI_QHS 4 "register_operand"      "   vr,   vr, 
  vr,   vr")
              (vec_duplicate:V_VLSI_QHS
-               (match_operand:<VEL> 5 "register_operand"     "    r,    r"))])
-         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+               (match_operand:<VEL> 5 "register_operand"     "    r,    r,    
r,    r"))])
+         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    
vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "th,th,none,none")])
 
 ;; We use early-clobber for source LMUL > dest LMUL.
 (define_insn "*pred_cmp<mode>_scalar_narrow"
@@ -4699,7 +4725,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,th,th,none,none")])
 
 (define_expand "@pred_eqne<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -4745,24 +4772,25 @@
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_eqne<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   
&vr,   &vr")
        (if_then_else:<VM>
          (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-            (match_operand 6 "vector_length_operand"         "   rK,   rK")
-            (match_operand 7 "const_int_operand"             "    i,    i")
-            (match_operand 8 "const_int_operand"             "    i,    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"      
"vmWc1,vmWc1,vmWc1,vmWc1")
+            (match_operand 6 "vector_length_operand"         "   rK,   rK,   
rK,   rK")
+            (match_operand 7 "const_int_operand"             "    i,    i,    
i,    i")
+            (match_operand 8 "const_int_operand"             "    i,    i,    
i,    i")
             (reg:SI VL_REGNUM)
             (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
          (match_operator:<VM> 3 "equality_operator"
             [(vec_duplicate:V_VLSI_QHS
-               (match_operand:<VEL> 5 "register_operand"     "    r,    r"))
-             (match_operand:V_VLSI_QHS 4 "register_operand"      "   vr,   
vr")])
-         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+               (match_operand:<VEL> 5 "register_operand"     "    r,    r,    
r,    r"))
+             (match_operand:V_VLSI_QHS 4 "register_operand"      "   vr,   vr, 
  vr,   vr")])
+         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    
vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "th,th,none,none")])
 
 ;; We use early-clobber for source LMUL > dest LMUL.
 (define_insn "*pred_eqne<mode>_scalar_narrow"
@@ -4783,7 +4811,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,th,th,none,none")])
 
 ;; Handle GET_MODE_INNER (mode) = DImode. We need to split them since
 ;; we need to deal with SEW = 64 in RV32 system.
@@ -4910,24 +4939,25 @@
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_cmp<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   
&vr,   &vr")
        (if_then_else:<VM>
          (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-            (match_operand 6 "vector_length_operand"         "   rK,   rK")
-            (match_operand 7 "const_int_operand"             "    i,    i")
-            (match_operand 8 "const_int_operand"             "    i,    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"      
"vmWc1,vmWc1,vmWc1,vmWc1")
+            (match_operand 6 "vector_length_operand"         "   rK,   rK,   
rK,   rK")
+            (match_operand 7 "const_int_operand"             "    i,    i,    
i,    i")
+            (match_operand 8 "const_int_operand"             "    i,    i,    
i,    i")
             (reg:SI VL_REGNUM)
             (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
          (match_operator:<VM> 3 "comparison_except_eqge_operator"
-            [(match_operand:V_VLSI_D 4 "register_operand"        "   vr,   vr")
+            [(match_operand:V_VLSI_D 4 "register_operand"        "   vr,   vr, 
  vr,   vr")
              (vec_duplicate:V_VLSI_D
-               (match_operand:<VEL> 5 "register_operand"     "    r,    r"))])
-         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+               (match_operand:<VEL> 5 "register_operand"     "    r,    r,    
r,    r"))])
+         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    
vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "th,th,none,none")])
 
 ;; We use early-clobber for source LMUL > dest LMUL.
 (define_insn "*pred_cmp<mode>_scalar_narrow"
@@ -4948,28 +4978,30 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,th,th,none,none")])
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_eqne<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   
&vr,   &vr")
        (if_then_else:<VM>
          (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-            (match_operand 6 "vector_length_operand"         "   rK,   rK")
-            (match_operand 7 "const_int_operand"             "    i,    i")
-            (match_operand 8 "const_int_operand"             "    i,    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"      
"vmWc1,vmWc1,vmWc1,vmWc1")
+            (match_operand 6 "vector_length_operand"         "   rK,   rK,   
rK,   rK")
+            (match_operand 7 "const_int_operand"             "    i,    i,    
i,    i")
+            (match_operand 8 "const_int_operand"             "    i,    i,    
i,    i")
             (reg:SI VL_REGNUM)
             (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
          (match_operator:<VM> 3 "equality_operator"
             [(vec_duplicate:V_VLSI_D
-               (match_operand:<VEL> 5 "register_operand"     "    r,    r"))
-             (match_operand:V_VLSI_D 4 "register_operand"        "   vr,   
vr")])
-         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+               (match_operand:<VEL> 5 "register_operand"     "    r,    r,    
r,    r"))
+             (match_operand:V_VLSI_D 4 "register_operand"        "   vr,   vr, 
  vr,   vr")])
+         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    
vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "th,th,none,none")])
 
 ;; We use early-clobber for source LMUL > dest LMUL.
 (define_insn "*pred_eqne<mode>_scalar_narrow"
@@ -4990,7 +5022,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,th,th,none,none")])
 
 (define_insn "*pred_cmp<mode>_extended_scalar_merge_tie_mask"
   [(set (match_operand:<VM> 0 "register_operand"               "=vm")
@@ -5019,25 +5052,26 @@
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_cmp<mode>_extended_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                 "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                 "=vr,   vr,   
&vr,   &vr")
        (if_then_else:<VM>
          (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1")
-            (match_operand 6 "vector_length_operand"          "   rK,   rK")
-            (match_operand 7 "const_int_operand"              "    i,    i")
-            (match_operand 8 "const_int_operand"              "    i,    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"       
"vmWc1,vmWc1,vmWc1,vmWc1")
+            (match_operand 6 "vector_length_operand"          "   rK,   rK,   
rK,   rK")
+            (match_operand 7 "const_int_operand"              "    i,    i,    
i,    i")
+            (match_operand 8 "const_int_operand"              "    i,    i,    
i,    i")
             (reg:SI VL_REGNUM)
             (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
          (match_operator:<VM> 3 "comparison_except_eqge_operator"
-            [(match_operand:V_VLSI_D 4 "register_operand"         "   vr,   
vr")
+            [(match_operand:V_VLSI_D 4 "register_operand"         "   vr,   
vr,   vr,   vr")
              (vec_duplicate:V_VLSI_D
                (sign_extend:<VEL>
-                 (match_operand:<VSUBEL> 5 "register_operand" "    r,    
r")))])
-         (match_operand:<VM> 2 "vector_merge_operand"         "   vu,    0")))]
+                 (match_operand:<VSUBEL> 5 "register_operand" "    r,    r,    
r,    r")))])
+         (match_operand:<VM> 2 "vector_merge_operand"         "   vu,    0,    
vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode) && 
!TARGET_64BIT"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "th,th,none,none")])
 
 (define_insn "*pred_cmp<mode>_extended_scalar_narrow"
   [(set (match_operand:<VM> 0 "register_operand"                 "=vm,   vr,   
vr,  &vr,  &vr")
@@ -5058,7 +5092,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode) && 
!TARGET_64BIT"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,th,th,none,none")])
 
 (define_insn "*pred_eqne<mode>_extended_scalar_merge_tie_mask"
   [(set (match_operand:<VM> 0 "register_operand"                 "=vm")
@@ -5087,25 +5122,26 @@
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_eqne<mode>_extended_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                 "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                 "=vr,   vr,   
&vr,   &vr")
        (if_then_else:<VM>
          (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1")
-            (match_operand 6 "vector_length_operand"          "   rK,   rK")
-            (match_operand 7 "const_int_operand"              "    i,    i")
-            (match_operand 8 "const_int_operand"              "    i,    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"       
"vmWc1,vmWc1,vmWc1,vmWc1")
+            (match_operand 6 "vector_length_operand"          "   rK,   rK,   
rK,   rK")
+            (match_operand 7 "const_int_operand"              "    i,    i,    
i,    i")
+            (match_operand 8 "const_int_operand"              "    i,    i,    
i,    i")
             (reg:SI VL_REGNUM)
             (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
          (match_operator:<VM> 3 "equality_operator"
             [(vec_duplicate:V_VLSI_D
                (sign_extend:<VEL>
-                 (match_operand:<VSUBEL> 5 "register_operand" "    r,    r")))
-             (match_operand:V_VLSI_D 4 "register_operand"         "   vr,   
vr")])
-         (match_operand:<VM> 2 "vector_merge_operand"         "   vu,    0")))]
+                 (match_operand:<VSUBEL> 5 "register_operand" "    r,    r,    
r,    r")))
+             (match_operand:V_VLSI_D 4 "register_operand"         "   vr,   
vr,   vr,   vr")])
+         (match_operand:<VM> 2 "vector_merge_operand"         "   vu,    0,    
vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode) && 
!TARGET_64BIT"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "th,th,none,none")])
 
 (define_insn "*pred_eqne<mode>_extended_scalar_narrow"
   [(set (match_operand:<VM> 0 "register_operand"                "=vm,   vr,   
vr,  &vr,  &vr")
@@ -5126,7 +5162,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode) && 
!TARGET_64BIT"
   "vms%B3.vx\t%0,%4,%5%p1"
   [(set_attr "type" "vicmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,th,th,none,none")])
 
 ;; GE, vmsge.vx/vmsgeu.vx
 ;;
@@ -7315,23 +7352,24 @@
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_cmp<mode>"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   
&vr,   &vr")
        (if_then_else:<VM>
          (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-            (match_operand 6 "vector_length_operand"         "   rK,   rK")
-            (match_operand 7 "const_int_operand"             "    i,    i")
-            (match_operand 8 "const_int_operand"             "    i,    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"      
"vmWc1,vmWc1,vmWc1,vmWc1")
+            (match_operand 6 "vector_length_operand"         "   rK,   rK,   
rK,   rK")
+            (match_operand 7 "const_int_operand"             "    i,    i,    
i,    i")
+            (match_operand 8 "const_int_operand"             "    i,    i,    
i,    i")
             (reg:SI VL_REGNUM)
             (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
          (match_operator:<VM> 3 "signed_order_operator"
-            [(match_operand:V_VLSF 4 "register_operand"      "   vr,   vr")
-             (match_operand:V_VLSF 5 "register_operand"      "   vr,   vr")])
-         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+            [(match_operand:V_VLSF 4 "register_operand"      "   vr,   vr,   
vr,   vr")
+             (match_operand:V_VLSF 5 "register_operand"      "   vr,   vr,   
vr,   vr")])
+         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    
vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vmf%B3.vv\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "th,th,none,none")])
 
 (define_insn "*pred_cmp<mode>_narrow_merge_tie_mask"
   [(set (match_operand:<VM> 0 "register_operand"               "=vm")
@@ -7374,7 +7412,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vmf%B3.vv\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,th,th,th,th,th,th,none,none")])
 
 (define_expand "@pred_cmp<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -7420,24 +7459,25 @@
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_cmp<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   
&vr,   &vr")
        (if_then_else:<VM>
          (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-            (match_operand 6 "vector_length_operand"         "   rK,   rK")
-            (match_operand 7 "const_int_operand"             "    i,    i")
-            (match_operand 8 "const_int_operand"             "    i,    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"      
"vmWc1,vmWc1,vmWc1,vmWc1")
+            (match_operand 6 "vector_length_operand"         "   rK,   rK,   
rK,   rK")
+            (match_operand 7 "const_int_operand"             "    i,    i,    
i,    i")
+            (match_operand 8 "const_int_operand"             "    i,    i,    
i,    i")
             (reg:SI VL_REGNUM)
             (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
          (match_operator:<VM> 3 "signed_order_operator"
-            [(match_operand:V_VLSF 4 "register_operand"      "   vr,   vr")
+            [(match_operand:V_VLSF 4 "register_operand"      "   vr,   vr,   
vr,   vr")
              (vec_duplicate:V_VLSF
-               (match_operand:<VEL> 5 "register_operand"     "    f,    f"))])
-         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+               (match_operand:<VEL> 5 "register_operand"     "    f,    f,    
f,    f"))])
+         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    
vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vmf%B3.vf\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "th,th,none,none")])
 
 ;; We use early-clobber for source LMUL > dest LMUL.
 (define_insn "*pred_cmp<mode>_scalar_narrow"
@@ -7458,7 +7498,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vmf%B3.vf\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,th,th,none,none")])
 
 (define_expand "@pred_eqne<mode>_scalar"
   [(set (match_operand:<VM> 0 "register_operand")
@@ -7504,24 +7545,25 @@
 
 ;; We don't use early-clobber for LMUL <= 1 to get better codegen.
 (define_insn "*pred_eqne<mode>_scalar"
-  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr")
+  [(set (match_operand:<VM> 0 "register_operand"                "=vr,   vr,   
&vr,   &vr")
        (if_then_else:<VM>
          (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-            (match_operand 6 "vector_length_operand"         "   rK,   rK")
-            (match_operand 7 "const_int_operand"             "    i,    i")
-            (match_operand 8 "const_int_operand"             "    i,    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"      
"vmWc1,vmWc1,vmWc1,vmWc1")
+            (match_operand 6 "vector_length_operand"         "   rK,   rK,   
rK,   rK")
+            (match_operand 7 "const_int_operand"             "    i,    i,    
i,    i")
+            (match_operand 8 "const_int_operand"             "    i,    i,    
i,    i")
             (reg:SI VL_REGNUM)
             (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
          (match_operator:<VM> 3 "equality_operator"
             [(vec_duplicate:V_VLSF
-               (match_operand:<VEL> 5 "register_operand"     "    f,    f"))
-             (match_operand:V_VLSF 4 "register_operand"      "   vr,   vr")])
-         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0")))]
+               (match_operand:<VEL> 5 "register_operand"     "    f,    f,    
f,    f"))
+             (match_operand:V_VLSF 4 "register_operand"      "   vr,   vr,   
vr,   vr")])
+         (match_operand:<VM> 2 "vector_merge_operand"        "   vu,    0,    
vu,    0")))]
   "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
   "vmf%B3.vf\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "th,th,none,none")])
 
 ;; We use early-clobber for source LMUL > dest LMUL.
 (define_insn "*pred_eqne<mode>_scalar_narrow"
@@ -7542,7 +7584,8 @@
   "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
   "vmf%B3.vf\t%0,%4,%5%p1"
   [(set_attr "type" "vfcmp")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "none,th,th,none,none")])
 
 ;; 
-------------------------------------------------------------------------------
 ;; ---- Predicated floating-point merge
@@ -7762,7 +7805,8 @@
   [(set_attr "type" "vfncvtftoi")
    (set_attr "mode" "<VNCONVERT>")
    (set (attr "frm_mode")
-       (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
+       (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))
+   (set_attr "group_overlap" "none,none,th,th,none,none")])
 
 (define_insn "@pred_narrow_<fix_cvt><mode>"
   [(set (match_operand:<VNCONVERT> 0 "register_operand"        "=vd, vd, vr, 
vr,  &vr,  &vr")
@@ -7804,7 +7848,8 @@
   [(set_attr "type" "vfncvtitof")
    (set_attr "mode" "<VNCONVERT>")
    (set (attr "frm_mode")
-       (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
+       (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))
+   (set_attr "group_overlap" "none,none,th,th,none,none")])
 
 (define_insn "@pred_trunc<mode>"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"       "=vd, vd, 
vr, vr,  &vr,  &vr")
@@ -7827,7 +7872,8 @@
   [(set_attr "type" "vfncvtftof")
    (set_attr "mode" "<V_DOUBLE_TRUNC>")
    (set (attr "frm_mode")
-       (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
+       (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))
+   (set_attr "group_overlap" "none,none,th,th,none,none")])
 
 (define_insn "@pred_rod_trunc<mode>"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand"       "=vd, vd, 
vr, vr,  &vr,  &vr")
-- 
2.17.1

[PATCH v4] RISC-V: Fix register overlap issue for some xtheadvector instructions

Reply via email to