This patch adds instruction patterns to support fusion of multiply-add and bit
extraction sequences for the Synopsys RHX-100 processor.  This increases the
likelihood that fusible sequences are produced in more situations.

gcc/ChangeLog:

        * config/riscv/arcv-rhx100.md (arcv_rhx100_imul_fused): New reservation.
        (arcv_rhx100_alu_fused): New reservation.
        * config/riscv/iterators.md (is_zero_extract): New code attribute.
        * config/riscv/riscv.cc (riscv_rtx_costs): Reduce cost for zero_extract
        for RHX-100.
        * config/riscv/riscv.md: Add imul_fused and alu_fused type attributes.
        (umaddhisi4): New expand.
        (madd_split): New insn_and_split.
        (madd_split_extended): New insn_and_split.
        (*zero_extract_fused): New insn_and_split.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/arcv-fusion-limm-condbr.c: New test.
        * gcc.target/riscv/arcv-fusion-madd.c: New test.
        * gcc.target/riscv/arcv-fusion-xbfu.c: New test.

Co-authored-by: Artemiy Volkov <[email protected]>
Co-authored-by: Michiel Derhaeg <[email protected]>
Signed-off-by: Luis Silva <[email protected]>
---
 gcc/config/riscv/arcv-rhx100.md               |  10 ++
 gcc/config/riscv/iterators.md                 |   2 +
 gcc/config/riscv/riscv.cc                     |   3 +-
 gcc/config/riscv/riscv.md                     | 135 +++++++++++++++++-
 .../riscv/arcv-fusion-limm-condbr.c           |  12 ++
 .../gcc.target/riscv/arcv-fusion-madd.c       |  12 ++
 .../gcc.target/riscv/arcv-fusion-xbfu.c       |  14 ++
 7 files changed, 185 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/arcv-fusion-limm-condbr.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/arcv-fusion-madd.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c

diff --git a/gcc/config/riscv/arcv-rhx100.md b/gcc/config/riscv/arcv-rhx100.md
index c0631a17a28..7cbabac29a5 100644
--- a/gcc/config/riscv/arcv-rhx100.md
+++ b/gcc/config/riscv/arcv-rhx100.md
@@ -42,6 +42,16 @@
                condmove,mvpair,zicond,cpop,clmul"))
   "((arcv_rhx100_issueA_fuse0 + arcv_rhx100_ALU_A_fuse0_early) | 
(arcv_rhx100_issueA_fuse1 + arcv_rhx100_ALU_A_fuse1_early)) | 
((arcv_rhx100_issueB_fuse0 + arcv_rhx100_ALU_B_fuse0_early) | 
(arcv_rhx100_issueB_fuse1 + arcv_rhx100_ALU_B_fuse1_early))")
 
+(define_insn_reservation "arcv_rhx100_imul_fused" 4
+  (and (eq_attr "tune" "arcv_rhx100")
+       (eq_attr "type" "imul_fused"))
+  "(arcv_rhx100_issueA_fuse0 + arcv_rhx100_issueA_fuse1 + 
arcv_rhx100_ALU_A_fuse0_early + arcv_rhx100_ALU_A_fuse1_early + 
arcv_rhx100_MPY32), nothing*3")
+
+(define_insn_reservation "arcv_rhx100_alu_fused" 1
+   (and (eq_attr "tune" "arcv_rhx100")
+       (eq_attr "type" "alu_fused"))
+  "(arcv_rhx100_issueA_fuse0 + arcv_rhx100_issueA_fuse1 + 
arcv_rhx100_ALU_A_fuse0_early + arcv_rhx100_ALU_A_fuse1_early) | 
(arcv_rhx100_issueB_fuse0 + arcv_rhx100_issueB_fuse1 + 
arcv_rhx100_ALU_B_fuse0_early + arcv_rhx100_ALU_B_fuse1_early)")
+
 (define_insn_reservation "arcv_rhx100_jmp_insn" 1
   (and (eq_attr "tune" "arcv_rhx100")
        (eq_attr "type" "branch,jump,call,jalr,ret,trap"))
diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 4cda08848f6..10537df7f9a 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -218,6 +218,8 @@
                                      (zero_extract "srliw")])
 (define_code_attr extract_shift [(sign_extract "ashiftrt")
                                 (zero_extract "lshiftrt")])
+(define_code_attr is_zero_extract [(sign_extract "false")
+                                  (zero_extract "true")])
 
 ;; This code iterator allows the two right shift instructions to be
 ;; generated from the same template.
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 7e6629fef6c..b679f3f604b 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4469,7 +4469,8 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
        }
       gcc_fallthrough ();
     case SIGN_EXTRACT:
-      if (TARGET_XTHEADBB && outer_code == SET
+      if ((TARGET_ARCV_RHX100 || TARGET_XTHEADBB)
+         && outer_code == SET
          && CONST_INT_P (XEXP (x, 1))
          && CONST_INT_P (XEXP (x, 2)))
        {
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 2633aebf57b..32ea4b464e8 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3073,6 +3073,7 @@
 ;; * Single-bit extraction (SFB)
 ;; * Extraction instruction th.ext(u) (XTheadBb)
 ;; * lshrsi3_extend_2 (see above)
+;; * Zero extraction fusion (ARC-V)
 (define_insn_and_split "*<any_extract:optab><GPR:mode>3"
   [(set (match_operand:GPR 0 "register_operand" "=r")
         (any_extract:GPR
@@ -3085,6 +3086,8 @@
      && (INTVAL (operands[2]) == 1))
    && !TARGET_XTHEADBB
    && !TARGET_XANDESPERF
+   && !(TARGET_ARCV_RHX100
+       && <any_extract:is_zero_extract>)
    && !(TARGET_64BIT
         && (INTVAL (operands[3]) > 0)
         && (INTVAL (operands[2]) + INTVAL (operands[3]) == 32))"
@@ -4525,8 +4528,62 @@
          (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand"))
                   (sign_extend:SI (match_operand:HI 2 "register_operand")))
          (match_operand:SI 3 "register_operand")))]
-  "TARGET_XTHEADMAC"
-)
+  "TARGET_XTHEADMAC || (TARGET_ARCV_RHX100
+                       && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL))"
+{
+  if (TARGET_ARCV_RHX100)
+    {
+      rtx tmp0 = gen_reg_rtx (SImode), tmp1 = gen_reg_rtx (SImode);
+      emit_insn (gen_extendhisi2 (tmp0, operands[1]));
+      emit_insn (gen_extendhisi2 (tmp1, operands[2]));
+
+      if (TARGET_64BIT)
+       {
+         rtx op0 = gen_reg_rtx (DImode);
+         emit_insn (gen_madd_fused_extended (op0, tmp0, tmp1, operands[3]));
+         op0 = gen_lowpart (SImode, op0);
+         SUBREG_PROMOTED_VAR_P (op0) = 1;
+         SUBREG_PROMOTED_SET (op0, SRP_SIGNED);
+         emit_move_insn (operands[0], op0);
+       }
+      else
+       {
+         emit_insn (gen_madd_fused (operands[0], tmp0, tmp1, operands[3]));
+       }
+
+      DONE;
+    }
+})
+
+(define_expand "umaddhisi4"
+  [(set (match_operand:SI 0 "register_operand")
+       (plus:SI
+         (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand"))
+                  (zero_extend:SI (match_operand:HI 2 "register_operand")))
+         (match_operand:SI 3 "register_operand")))]
+  "TARGET_ARCV_RHX100
+   && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL)"
+{
+  rtx tmp0 = gen_reg_rtx (SImode), tmp1 = gen_reg_rtx (SImode);
+  emit_insn (gen_zero_extendhisi2 (tmp0, operands[1]));
+  emit_insn (gen_zero_extendhisi2 (tmp1, operands[2]));
+
+  if (TARGET_64BIT)
+    {
+      rtx op0 = gen_reg_rtx (DImode);
+      emit_insn (gen_madd_fused_extended (op0, tmp0, tmp1, operands[3]));
+      op0 = gen_lowpart (SImode, op0);
+      SUBREG_PROMOTED_VAR_P (op0) = 1;
+      SUBREG_PROMOTED_SET (op0, SRP_SIGNED);
+      emit_move_insn (operands[0], op0);
+    }
+  else
+    {
+      emit_insn (gen_madd_fused (operands[0], tmp0, tmp1, operands[3]));
+    }
+
+  DONE;
+})
 
 (define_expand "msubhisi4"
   [(set (match_operand:SI 0 "register_operand")
@@ -4537,6 +4594,80 @@
   "TARGET_XTHEADMAC"
 )
 
+(define_insn_and_split "madd_fused"
+  [(set (match_operand:SI 0 "register_operand" "=&r,r")
+     (plus:SI
+       (mult:SI (match_operand:SI 1 "register_operand" "r,r")
+                (match_operand:SI 2 "register_operand" "r,r"))
+       (match_operand:SI 3 "register_operand" "r,?0")))
+    (clobber (match_scratch:SI 4 "=&r,&r"))]
+  "TARGET_ARCV_RHX100
+   && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "{
+     if (REGNO (operands[0]) == REGNO (operands[3]))
+       {
+        emit_insn (gen_mulsi3 (operands[4], operands[1], operands[2]));
+        emit_insn (gen_addsi3 (operands[0], operands[3], operands[4]));
+       }
+     else
+       {
+        emit_insn (gen_mulsi3 (operands[0], operands[1], operands[2]));
+        emit_insn (gen_addsi3 (operands[0], operands[0], operands[3]));
+       }
+    DONE;
+   }"
+  [(set_attr "type" "imul_fused")])
+
+(define_insn_and_split "madd_fused_extended"
+  [(set (match_operand:DI 0 "register_operand" "=&r,r")
+     (sign_extend:DI
+      (plus:SI
+       (mult:SI (match_operand:SI 1 "register_operand" "r,r")
+                (match_operand:SI 2 "register_operand" "r,r"))
+       (match_operand:SI 3 "register_operand" "r,?0"))))
+    (clobber (match_scratch:SI 4 "=&r,&r"))]
+  "TARGET_ARCV_RHX100
+   && (TARGET_ZMMUL || TARGET_MUL)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "{
+     if (REGNO (operands[0]) == REGNO (operands[3]))
+       {
+        emit_insn (gen_mulsi3_extended (operands[4], operands[1], 
operands[2]));
+        emit_insn (gen_addsi3_extended (operands[0], operands[3], 
operands[4]));
+       }
+     else
+       {
+        emit_insn (gen_mulsi3_extended (operands[0], operands[1], 
operands[2]));
+        emit_insn (gen_addsi3_extended (operands[0], operands[0], 
operands[3]));
+       }
+    DONE;
+   }"
+  [(set_attr "type" "imul_fused")])
+
+(define_insn_and_split "*zero_extract_fused"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+                        (match_operand 2 "const_int_operand")
+                        (match_operand 3 "const_int_operand")))]
+  "TARGET_ARCV_RHX100 && !TARGET_64BIT
+     && (INTVAL (operands[2]) > 1 || !TARGET_ZBS)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (ashift:SI   (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 3)))]
+  "{
+     int amount = INTVAL (operands[2]);
+     int end = INTVAL (operands[3]) + amount;
+     operands[2] = GEN_INT (BITS_PER_WORD - end);
+     operands[3] = GEN_INT (BITS_PER_WORD - amount);
+   }"
+  [(set_attr "type" "alu_fused")])
+
 ;; String compare with length insn.
 ;; Argument 0 is the target (result)
 ;; Argument 1 is the source1
diff --git a/gcc/testsuite/gcc.target/riscv/arcv-fusion-limm-condbr.c 
b/gcc/testsuite/gcc.target/riscv/arcv-fusion-limm-condbr.c
new file mode 100644
index 00000000000..cc2a56a2e08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arcv-fusion-limm-condbr.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=arc-v-rhx-100-series" } */
+
+int
+f (int x)
+{
+  begin:
+  if (x <= 3)
+    goto begin;
+}
+
+/* { dg-final { scan-assembler "\\sli\\sa5,3\n\\sble\\sa0,a5,.L\[0-9\]+\n" } } 
*/
diff --git a/gcc/testsuite/gcc.target/riscv/arcv-fusion-madd.c 
b/gcc/testsuite/gcc.target/riscv/arcv-fusion-madd.c
new file mode 100644
index 00000000000..eb8665f576c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arcv-fusion-madd.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv32 } */
+/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" } } */
+/* { dg-options "-mtune=arc-v-rhx-100-series -march=rv32im -mabi=ilp32" } */
+
+int
+f (int x, int y, int z, int v, int w)
+{
+  return x + y * z + v * w;
+}
+
+/* { dg-final { scan-assembler 
{\smul\s([ast][0-9]+),a1,a2\n\sadd\s\1,\1,a0\n\smul\sa0,a3,a4\n\sadd\sa0,a0,\1\n}
 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c 
b/gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c
new file mode 100644
index 00000000000..b471c20ae57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv32 } */
+/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" "-Oz" "-Os" } } */
+/* { dg-options "-mtune=arc-v-rhx-100-series -march=rv32im_zbs -mabi=ilp32" } 
*/
+
+#define bit_extract(x,start,amt) (((x)>>(start)) & (~(0xffffffff << (amt))))
+
+int
+f (int x)
+{
+  return bit_extract(x,10,14) + bit_extract(x,1,1);
+}
+
+/* { dg-final { scan-assembler 
{\sslli\s([ast][0-9]+),a0,8\n\ssrli\s([ast][0-9]+),\1,18\n\sbexti\sa0,a0,1.*\n\sadd\sa0,\2,a0.*\n}
 } } */
-- 
2.34.0

Reply via email to