This patch adds instruction patterns to support fusion of multiply-add and bit
extraction sequences for the Synopsys RHX-100 processor. This increases the
likelihood that fusible sequences are produced in more situations.
gcc/ChangeLog:
* config/riscv/arcv-rhx100.md (arcv_rhx100_imul_fused): New reservation.
(arcv_rhx100_alu_fused): New reservation.
* config/riscv/iterators.md (is_zero_extract): New code attribute.
* config/riscv/riscv.cc (riscv_rtx_costs): Reduce cost for zero_extract
for RHX-100.
* config/riscv/riscv.md: Add imul_fused and alu_fused type attributes.
(umaddhisi4): New expand.
(madd_split): New insn_and_split.
(madd_split_extended): New insn_and_split.
(*zero_extract_fused): New insn_and_split.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/arcv-fusion-limm-condbr.c: New test.
* gcc.target/riscv/arcv-fusion-madd.c: New test.
* gcc.target/riscv/arcv-fusion-xbfu.c: New test.
Co-authored-by: Artemiy Volkov <[email protected]>
Co-authored-by: Michiel Derhaeg <[email protected]>
Signed-off-by: Luis Silva <[email protected]>
---
gcc/config/riscv/arcv-rhx100.md | 10 ++
gcc/config/riscv/iterators.md | 2 +
gcc/config/riscv/riscv.cc | 3 +-
gcc/config/riscv/riscv.md | 135 +++++++++++++++++-
.../riscv/arcv-fusion-limm-condbr.c | 12 ++
.../gcc.target/riscv/arcv-fusion-madd.c | 12 ++
.../gcc.target/riscv/arcv-fusion-xbfu.c | 14 ++
7 files changed, 185 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/arcv-fusion-limm-condbr.c
create mode 100644 gcc/testsuite/gcc.target/riscv/arcv-fusion-madd.c
create mode 100644 gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c
diff --git a/gcc/config/riscv/arcv-rhx100.md b/gcc/config/riscv/arcv-rhx100.md
index c0631a17a28..7cbabac29a5 100644
--- a/gcc/config/riscv/arcv-rhx100.md
+++ b/gcc/config/riscv/arcv-rhx100.md
@@ -42,6 +42,16 @@
condmove,mvpair,zicond,cpop,clmul"))
"((arcv_rhx100_issueA_fuse0 + arcv_rhx100_ALU_A_fuse0_early) |
(arcv_rhx100_issueA_fuse1 + arcv_rhx100_ALU_A_fuse1_early)) |
((arcv_rhx100_issueB_fuse0 + arcv_rhx100_ALU_B_fuse0_early) |
(arcv_rhx100_issueB_fuse1 + arcv_rhx100_ALU_B_fuse1_early))")
+(define_insn_reservation "arcv_rhx100_imul_fused" 4
+ (and (eq_attr "tune" "arcv_rhx100")
+ (eq_attr "type" "imul_fused"))
+ "(arcv_rhx100_issueA_fuse0 + arcv_rhx100_issueA_fuse1 +
arcv_rhx100_ALU_A_fuse0_early + arcv_rhx100_ALU_A_fuse1_early +
arcv_rhx100_MPY32), nothing*3")
+
+(define_insn_reservation "arcv_rhx100_alu_fused" 1
+ (and (eq_attr "tune" "arcv_rhx100")
+ (eq_attr "type" "alu_fused"))
+ "(arcv_rhx100_issueA_fuse0 + arcv_rhx100_issueA_fuse1 +
arcv_rhx100_ALU_A_fuse0_early + arcv_rhx100_ALU_A_fuse1_early) |
(arcv_rhx100_issueB_fuse0 + arcv_rhx100_issueB_fuse1 +
arcv_rhx100_ALU_B_fuse0_early + arcv_rhx100_ALU_B_fuse1_early)")
+
(define_insn_reservation "arcv_rhx100_jmp_insn" 1
(and (eq_attr "tune" "arcv_rhx100")
(eq_attr "type" "branch,jump,call,jalr,ret,trap"))
diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 4cda08848f6..10537df7f9a 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -218,6 +218,8 @@
(zero_extract "srliw")])
(define_code_attr extract_shift [(sign_extract "ashiftrt")
(zero_extract "lshiftrt")])
+(define_code_attr is_zero_extract [(sign_extract "false")
+ (zero_extract "true")])
;; This code iterator allows the two right shift instructions to be
;; generated from the same template.
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 7e6629fef6c..b679f3f604b 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4469,7 +4469,8 @@ riscv_rtx_costs (rtx x, machine_mode mode, int
outer_code, int opno ATTRIBUTE_UN
}
gcc_fallthrough ();
case SIGN_EXTRACT:
- if (TARGET_XTHEADBB && outer_code == SET
+ if ((TARGET_ARCV_RHX100 || TARGET_XTHEADBB)
+ && outer_code == SET
&& CONST_INT_P (XEXP (x, 1))
&& CONST_INT_P (XEXP (x, 2)))
{
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 2633aebf57b..32ea4b464e8 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3073,6 +3073,7 @@
;; * Single-bit extraction (SFB)
;; * Extraction instruction th.ext(u) (XTheadBb)
;; * lshrsi3_extend_2 (see above)
+;; * Zero extraction fusion (ARC-V)
(define_insn_and_split "*<any_extract:optab><GPR:mode>3"
[(set (match_operand:GPR 0 "register_operand" "=r")
(any_extract:GPR
@@ -3085,6 +3086,8 @@
&& (INTVAL (operands[2]) == 1))
&& !TARGET_XTHEADBB
&& !TARGET_XANDESPERF
+ && !(TARGET_ARCV_RHX100
+ && <any_extract:is_zero_extract>)
&& !(TARGET_64BIT
&& (INTVAL (operands[3]) > 0)
&& (INTVAL (operands[2]) + INTVAL (operands[3]) == 32))"
@@ -4525,8 +4528,62 @@
(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand"))
(sign_extend:SI (match_operand:HI 2 "register_operand")))
(match_operand:SI 3 "register_operand")))]
- "TARGET_XTHEADMAC"
-)
+ "TARGET_XTHEADMAC || (TARGET_ARCV_RHX100
+ && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL))"
+{
+ if (TARGET_ARCV_RHX100)
+ {
+ rtx tmp0 = gen_reg_rtx (SImode), tmp1 = gen_reg_rtx (SImode);
+ emit_insn (gen_extendhisi2 (tmp0, operands[1]));
+ emit_insn (gen_extendhisi2 (tmp1, operands[2]));
+
+ if (TARGET_64BIT)
+ {
+ rtx op0 = gen_reg_rtx (DImode);
+ emit_insn (gen_madd_fused_extended (op0, tmp0, tmp1, operands[3]));
+ op0 = gen_lowpart (SImode, op0);
+ SUBREG_PROMOTED_VAR_P (op0) = 1;
+ SUBREG_PROMOTED_SET (op0, SRP_SIGNED);
+ emit_move_insn (operands[0], op0);
+ }
+ else
+ {
+ emit_insn (gen_madd_fused (operands[0], tmp0, tmp1, operands[3]));
+ }
+
+ DONE;
+ }
+})
+
+(define_expand "umaddhisi4"
+ [(set (match_operand:SI 0 "register_operand")
+ (plus:SI
+ (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand"))
+ (zero_extend:SI (match_operand:HI 2 "register_operand")))
+ (match_operand:SI 3 "register_operand")))]
+ "TARGET_ARCV_RHX100
+ && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL)"
+{
+ rtx tmp0 = gen_reg_rtx (SImode), tmp1 = gen_reg_rtx (SImode);
+ emit_insn (gen_zero_extendhisi2 (tmp0, operands[1]));
+ emit_insn (gen_zero_extendhisi2 (tmp1, operands[2]));
+
+ if (TARGET_64BIT)
+ {
+ rtx op0 = gen_reg_rtx (DImode);
+ emit_insn (gen_madd_fused_extended (op0, tmp0, tmp1, operands[3]));
+ op0 = gen_lowpart (SImode, op0);
+ SUBREG_PROMOTED_VAR_P (op0) = 1;
+ SUBREG_PROMOTED_SET (op0, SRP_SIGNED);
+ emit_move_insn (operands[0], op0);
+ }
+ else
+ {
+ emit_insn (gen_madd_fused (operands[0], tmp0, tmp1, operands[3]));
+ }
+
+ DONE;
+})
(define_expand "msubhisi4"
[(set (match_operand:SI 0 "register_operand")
@@ -4537,6 +4594,80 @@
"TARGET_XTHEADMAC"
)
+(define_insn_and_split "madd_fused"
+ [(set (match_operand:SI 0 "register_operand" "=&r,r")
+ (plus:SI
+ (mult:SI (match_operand:SI 1 "register_operand" "r,r")
+ (match_operand:SI 2 "register_operand" "r,r"))
+ (match_operand:SI 3 "register_operand" "r,?0")))
+ (clobber (match_scratch:SI 4 "=&r,&r"))]
+ "TARGET_ARCV_RHX100
+ && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "{
+ if (REGNO (operands[0]) == REGNO (operands[3]))
+ {
+ emit_insn (gen_mulsi3 (operands[4], operands[1], operands[2]));
+ emit_insn (gen_addsi3 (operands[0], operands[3], operands[4]));
+ }
+ else
+ {
+ emit_insn (gen_mulsi3 (operands[0], operands[1], operands[2]));
+ emit_insn (gen_addsi3 (operands[0], operands[0], operands[3]));
+ }
+ DONE;
+ }"
+ [(set_attr "type" "imul_fused")])
+
+(define_insn_and_split "madd_fused_extended"
+ [(set (match_operand:DI 0 "register_operand" "=&r,r")
+ (sign_extend:DI
+ (plus:SI
+ (mult:SI (match_operand:SI 1 "register_operand" "r,r")
+ (match_operand:SI 2 "register_operand" "r,r"))
+ (match_operand:SI 3 "register_operand" "r,?0"))))
+ (clobber (match_scratch:SI 4 "=&r,&r"))]
+ "TARGET_ARCV_RHX100
+ && (TARGET_ZMMUL || TARGET_MUL)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "{
+ if (REGNO (operands[0]) == REGNO (operands[3]))
+ {
+ emit_insn (gen_mulsi3_extended (operands[4], operands[1],
operands[2]));
+ emit_insn (gen_addsi3_extended (operands[0], operands[3],
operands[4]));
+ }
+ else
+ {
+ emit_insn (gen_mulsi3_extended (operands[0], operands[1],
operands[2]));
+ emit_insn (gen_addsi3_extended (operands[0], operands[0],
operands[3]));
+ }
+ DONE;
+ }"
+ [(set_attr "type" "imul_fused")])
+
+(define_insn_and_split "*zero_extract_fused"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand 2 "const_int_operand")
+ (match_operand 3 "const_int_operand")))]
+ "TARGET_ARCV_RHX100 && !TARGET_64BIT
+ && (INTVAL (operands[2]) > 1 || !TARGET_ZBS)"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))
+ (set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 3)))]
+ "{
+ int amount = INTVAL (operands[2]);
+ int end = INTVAL (operands[3]) + amount;
+ operands[2] = GEN_INT (BITS_PER_WORD - end);
+ operands[3] = GEN_INT (BITS_PER_WORD - amount);
+ }"
+ [(set_attr "type" "alu_fused")])
+
;; String compare with length insn.
;; Argument 0 is the target (result)
;; Argument 1 is the source1
diff --git a/gcc/testsuite/gcc.target/riscv/arcv-fusion-limm-condbr.c
b/gcc/testsuite/gcc.target/riscv/arcv-fusion-limm-condbr.c
new file mode 100644
index 00000000000..cc2a56a2e08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arcv-fusion-limm-condbr.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=arc-v-rhx-100-series" } */
+
+int
+f (int x)
+{
+ begin:
+ if (x <= 3)
+ goto begin;
+}
+
+/* { dg-final { scan-assembler "\\sli\\sa5,3\n\\sble\\sa0,a5,.L\[0-9\]+\n" } }
*/
diff --git a/gcc/testsuite/gcc.target/riscv/arcv-fusion-madd.c
b/gcc/testsuite/gcc.target/riscv/arcv-fusion-madd.c
new file mode 100644
index 00000000000..eb8665f576c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arcv-fusion-madd.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv32 } */
+/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" } } */
+/* { dg-options "-mtune=arc-v-rhx-100-series -march=rv32im -mabi=ilp32" } */
+
+int
+f (int x, int y, int z, int v, int w)
+{
+ return x + y * z + v * w;
+}
+
+/* { dg-final { scan-assembler
{\smul\s([ast][0-9]+),a1,a2\n\sadd\s\1,\1,a0\n\smul\sa0,a3,a4\n\sadd\sa0,a0,\1\n}
} } */
diff --git a/gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c
b/gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c
new file mode 100644
index 00000000000..b471c20ae57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv32 } */
+/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" "-Oz" "-Os" } } */
+/* { dg-options "-mtune=arc-v-rhx-100-series -march=rv32im_zbs -mabi=ilp32" }
*/
+
+#define bit_extract(x,start,amt) (((x)>>(start)) & (~(0xffffffff << (amt))))
+
+int
+f (int x)
+{
+ return bit_extract(x,10,14) + bit_extract(x,1,1);
+}
+
+/* { dg-final { scan-assembler
{\sslli\s([ast][0-9]+),a0,8\n\ssrli\s([ast][0-9]+),\1,18\n\sbexti\sa0,a0,1.*\n\sadd\sa0,\2,a0.*\n}
} } */
--
2.34.0