This patch implements instruction scheduling support for the dual-issue Synopsys
RHX-100 processor by adding scheduler hooks and state tracking for the two
execution pipes.
These hooks address microarchitectural details not covered by the pipeline
description.
The riscv_sched_variable_issue () and riscv_sched_reorder2 () hooks work
together to make sure that:
- the critical path and the instruction priorities are respected;
- both pipes are filled (taking advantage of parallel dispatch within the
microarchitectural constraints);
- there is as much fusion going on as possible;
- the existing fusion pairs are not broken up.
riscv_sched_adjust_priority () slightly bumps the priority of load/store pairs.
As a result it becomes easier for riscv_sched_reorder2 () to schedule
instructions in the memory pipe.
gcc/ChangeLog:
* config/riscv/riscv-protos.h (arcv_sched_init): New declaration.
(arcv_sched_reorder2): declaration.
(arcv_sched_adjust_priority): New declaration.
(arcv_sched_adjust_cost): New declaration.
(arcv_can_issue_more_p): New declaration.
(arcv_sched_variable_issue): New declaration.
* config/riscv/arcv.cc (struct arcv_sched_state): New struct.
(arcv_sched_init): New function.
(arcv_next_fusible_insn): New function.
(arcv_sched_reorder2): New function.
(arcv_sched_adjust_priority): New function.
(arcv_sched_adjust_cost): New function.
(arcv_can_issue_more_p): New function.
(arcv_sched_variable_issue): New function.
* config/riscv/riscv.cc (riscv_fusion_enabled_p): Add forward
declaration.
(riscv_sched_init): Add call to arcv_shed_init.
(riscv_sched_variable_issue): Add ARC-V-specific handling.
(riscv_sched_adjust_cost): Add ARC-V-specific cost adjustment and fix
parameter names.
(riscv_sched_adjust_priority): New function.
(riscv_sched_reorder2): New function.
(TARGET_SCHED_ADJUST_PRIORITY): Define hook.
(TARGET_SCHED_REORDER2): Define hook.
* config/riscv/riscv.h (TARGET_ARCV_RHX100): New macro.
Co-authored-by: Artemiy Volkov <[email protected]>
Co-authored-by: Michiel Derhaeg <[email protected]>
Co-authored-by: Alex Turjan <[email protected]>
Signed-off-by: Luis Silva <[email protected]>
---
gcc/config/riscv/arcv.cc | 287 ++++++++++++++++++++++++++++++++
gcc/config/riscv/riscv-protos.h | 6 +
gcc/config/riscv/riscv.cc | 58 ++++++-
gcc/config/riscv/riscv.md | 2 +-
4 files changed, 348 insertions(+), 5 deletions(-)
diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc
index af237660226..df042f9947b 100644
--- a/gcc/config/riscv/arcv.cc
+++ b/gcc/config/riscv/arcv.cc
@@ -49,6 +49,32 @@ along with GCC; see the file COPYING3. If not see
#include "sched-int.h"
#include "tm-constrs.h"
+/* Scheduler state tracking for dual-pipe ARCV architectures. */
+
+struct arcv_sched_state {
+ /* True if the ALU pipe has been scheduled for the current cycle.
+ The ALU pipe handles arithmetic, logical, and other computational
+ instructions. */
+ int alu_pipe_scheduled_p;
+
+ /* True if pipe B has been scheduled for the current cycle.
+ Pipe B is the second execution pipe, typically used for memory
+ operations (loads/stores) but can also handle other instructions. */
+ int pipeB_scheduled_p;
+
+ /* The last instruction that was scheduled. Used to detect fusion
+ opportunities by looking ahead at the next instruction to be
+ scheduled. */
+ rtx_insn *last_scheduled_insn;
+
+ /* Cached value of how many more instructions can be issued in the
+ current cycle. Updated as instructions are scheduled and pipes
+ become occupied. */
+ short cached_can_issue_more;
+};
+
+static struct arcv_sched_state sched_state;
+
/* Return TRUE if the target microarchitecture supports macro-op
fusion for two memory operations of mode MODE (the direction
of transfer is determined by the IS_LOAD parameter). */
@@ -498,6 +524,218 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
return false;
}
+/* Initialize ARCV scheduler state at the beginning of scheduling. */
+
+void
+arcv_sched_init (void)
+{
+ sched_state.last_scheduled_insn = 0;
+}
+
+/* Return the next possible fusible insn. */
+
+static rtx_insn *
+arcv_next_fusible_insn (rtx_insn *insn)
+{
+ while (insn)
+ {
+ insn = NEXT_INSN (insn);
+
+ if (insn == 0)
+ break;
+
+ if (DEBUG_INSN_P (insn)
+ || NOTE_P (insn))
+ continue;
+
+ if (NOTE_INSN_BASIC_BLOCK_P (insn))
+ return NULL;
+
+ if (GET_CODE (insn) == CODE_LABEL
+ || GET_CODE (insn) == BARRIER
+ || GET_CODE (PATTERN (insn)) == USE)
+ continue;
+
+ if (JUMP_TABLE_DATA_P (insn))
+ return NULL;
+
+ break;
+ }
+
+ return insn;
+}
+
+/* Try to reorder ready queue to promote ARCV fusion opportunities.
+ Returns the number of instructions that can be issued this cycle. */
+
+int
+arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp)
+{
+ if (sched_fusion)
+ return sched_state.cached_can_issue_more;
+
+ if (!sched_state.cached_can_issue_more)
+ return 0;
+
+ /* Fuse double load/store instances missed by sched_fusion. */
+ if (!sched_state.pipeB_scheduled_p && sched_state.last_scheduled_insn
+ && ready && *n_readyp > 0
+ && !SCHED_GROUP_P (sched_state.last_scheduled_insn)
+ && (get_attr_type (sched_state.last_scheduled_insn) == TYPE_LOAD
+ || get_attr_type (sched_state.last_scheduled_insn) == TYPE_STORE))
+ {
+ for (int i = 1; i <= *n_readyp; i++)
+ {
+ rtx_insn* next_insn = arcv_next_fusible_insn (ready[*n_readyp - i]);
+ /* Try to fuse the last_scheduled_insn with. */
+ /* Fuse only with nondebug insn. */
+ if (NONDEBUG_INSN_P (ready[*n_readyp - i])
+ /* Which have not been already fused. */
+ && !SCHED_GROUP_P (ready[*n_readyp - i])
+ && (!next_insn || !SCHED_GROUP_P (next_insn))
+ && arcv_macro_fusion_pair_p (sched_state.last_scheduled_insn,
+ ready[*n_readyp - i]))
+ {
+ std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
+ SCHED_GROUP_P (ready[*n_readyp - 1]) = 1;
+ sched_state.pipeB_scheduled_p = 1;
+ return sched_state.cached_can_issue_more;
+ }
+ }
+ sched_state.pipeB_scheduled_p = 1;
+ }
+
+ /* Try to fuse a non-memory last_scheduled_insn. */
+ if ((!sched_state.alu_pipe_scheduled_p || !sched_state.pipeB_scheduled_p)
+ && sched_state.last_scheduled_insn && ready && *n_readyp > 0
+ && !SCHED_GROUP_P (sched_state.last_scheduled_insn)
+ && (get_attr_type (sched_state.last_scheduled_insn) != TYPE_LOAD
+ && get_attr_type (sched_state.last_scheduled_insn) != TYPE_STORE))
+ {
+ for (int i = 1; i <= *n_readyp; i++)
+ {
+ rtx_insn* next_insn = arcv_next_fusible_insn (ready[*n_readyp - i]);
+ if (NONDEBUG_INSN_P (ready[*n_readyp - i])
+ && !SCHED_GROUP_P (ready[*n_readyp - i])
+ && active_insn_p (ready[*n_readyp - i])
+ && (!next_insn || !SCHED_GROUP_P (next_insn))
+ && arcv_macro_fusion_pair_p (sched_state.last_scheduled_insn,
+ ready[*n_readyp - i]))
+ {
+ if (GET_CODE (PATTERN (ready[*n_readyp - i])) == USE)
+ continue;
+
+ if (get_attr_type (ready[*n_readyp - i]) == TYPE_LOAD
+ || get_attr_type (ready[*n_readyp - i]) == TYPE_STORE)
+ {
+ if (sched_state.pipeB_scheduled_p)
+ continue;
+ else
+ sched_state.pipeB_scheduled_p = 1;
+ }
+ else if (!sched_state.alu_pipe_scheduled_p)
+ sched_state.alu_pipe_scheduled_p = 1;
+ else
+ sched_state.pipeB_scheduled_p = 1;
+
+ std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
+ SCHED_GROUP_P (ready[*n_readyp - 1]) = 1;
+ return sched_state.cached_can_issue_more;
+ }
+ }
+ sched_state.alu_pipe_scheduled_p = 1;
+ }
+
+ /* When pipe B is scheduled, we can have no more memops this cycle. */
+ if (sched_state.pipeB_scheduled_p && *n_readyp > 0
+ && NONDEBUG_INSN_P (ready[*n_readyp - 1])
+ && recog_memoized (ready[*n_readyp - 1]) >= 0
+ && !SCHED_GROUP_P (ready[*n_readyp - 1])
+ && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD
+ || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE))
+ {
+ if (sched_state.alu_pipe_scheduled_p)
+ return 0;
+
+ for (int i = 2; i <= *n_readyp; i++)
+ {
+ rtx_insn* next_insn = arcv_next_fusible_insn (ready[*n_readyp - i]);
+ if ((NONDEBUG_INSN_P (ready[*n_readyp - i])
+ && recog_memoized (ready[*n_readyp - i]) >= 0
+ && get_attr_type (ready[*n_readyp - i]) != TYPE_LOAD
+ && get_attr_type (ready[*n_readyp - i]) != TYPE_STORE
+ && !SCHED_GROUP_P (ready[*n_readyp - i])
+ && (!next_insn || !SCHED_GROUP_P (next_insn)))
+ || (next_insn && NONDEBUG_INSN_P (next_insn)
+ && recog_memoized (next_insn) >= 0
+ && get_attr_type (next_insn) != TYPE_LOAD
+ && get_attr_type (next_insn) != TYPE_STORE))
+ {
+ std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
+ sched_state.alu_pipe_scheduled_p = 1;
+ sched_state.cached_can_issue_more = 1;
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ /* If all else fails, schedule a single instruction. */
+ if (ready && *n_readyp > 0
+ && NONDEBUG_INSN_P (ready[*n_readyp - 1])
+ && recog_memoized (ready[*n_readyp - 1]) >= 0)
+ {
+ rtx_insn *insn = ready[*n_readyp - 1];
+ enum attr_type insn_type = get_attr_type (insn);
+
+ /* Memory operations go to pipeB if available. */
+ if (!sched_state.pipeB_scheduled_p
+ && (insn_type == TYPE_LOAD || insn_type == TYPE_STORE))
+ {
+ sched_state.pipeB_scheduled_p = 1;
+ }
+ /* Non-memory operations go to ALU pipe. */
+ else if (insn_type != TYPE_LOAD && insn_type != TYPE_STORE)
+ {
+ sched_state.alu_pipe_scheduled_p = 1;
+ }
+ }
+
+ return sched_state.cached_can_issue_more;
+}
+
+int
+arcv_sched_adjust_priority (rtx_insn *insn, int priority)
+{
+ if (DEBUG_INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE
+ || GET_CODE (PATTERN (insn)) == CLOBBER)
+ return priority;
+
+ /* Bump the priority of fused load-store pairs for easier
+ scheduling of the memory pipe. The specific increase
+ value is determined empirically. */
+ if (next_insn (insn) && INSN_P (next_insn (insn))
+ && SCHED_GROUP_P (next_insn (insn))
+ && ((get_attr_type (insn) == TYPE_STORE
+ && get_attr_type (next_insn (insn)) == TYPE_STORE)
+ || (get_attr_type (insn) == TYPE_LOAD
+ && get_attr_type (next_insn (insn)) == TYPE_LOAD)))
+ return priority + 1;
+
+ return priority;
+}
+
+/* Adjust scheduling cost for ARCV fusion. */
+
+int
+arcv_sched_adjust_cost (rtx_insn *insn, int dep_type, int cost)
+{
+ if (dep_type == REG_DEP_ANTI && !SCHED_GROUP_P (insn))
+ return cost + 1;
+
+ return cost;
+}
+
/* If INSN is a load or store of address in the form of [base+offset],
extract the two parts and set to BASE and OFFSET. IS_LOAD is set
to TRUE if it's a load. Return TRUE if INSN is such an instruction,
@@ -598,3 +836,52 @@ arcv_sched_fusion_priority (rtx_insn *insn, int max_pri,
int *fusion_pri,
*pri = priority;
}
+
+bool
+arcv_can_issue_more_p (int issue_rate, int more)
+{
+ /* Beginning of cycle - reset variables. */
+ if (more == issue_rate)
+ {
+ sched_state.alu_pipe_scheduled_p = 0;
+ sched_state.pipeB_scheduled_p = 0;
+ }
+
+ if (sched_state.alu_pipe_scheduled_p && sched_state.pipeB_scheduled_p)
+ {
+ sched_state.cached_can_issue_more = 0;
+ return false;
+ }
+
+ sched_state.cached_can_issue_more = more;
+
+ return true;
+}
+
+int
+arcv_sched_variable_issue (rtx_insn *insn, int more)
+{
+ if (next_insn (insn) && INSN_P (next_insn (insn))
+ && SCHED_GROUP_P (next_insn (insn)))
+ {
+ if (get_attr_type (insn) == TYPE_LOAD
+ || get_attr_type (insn) == TYPE_STORE
+ || get_attr_type (next_insn (insn)) == TYPE_LOAD
+ || get_attr_type (next_insn (insn)) == TYPE_STORE)
+ sched_state.pipeB_scheduled_p = 1;
+ else
+ sched_state.alu_pipe_scheduled_p = 1;
+ }
+
+ if (get_attr_type (insn) == TYPE_ALU_FUSED
+ || get_attr_type (insn) == TYPE_IMUL_FUSED)
+ {
+ sched_state.alu_pipe_scheduled_p = 1;
+ more -= 1;
+ }
+
+ sched_state.last_scheduled_insn = insn;
+ sched_state.cached_can_issue_more = more - 1;
+
+ return sched_state.cached_can_issue_more;
+}
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index c3157c084d1..618152f2494 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -842,6 +842,12 @@ extern bool th_print_operand_address (FILE *,
machine_mode, rtx);
/* Routines implemented in arcv.cc. */
extern bool arcv_macro_fusion_pair_p (rtx_insn *, rtx_insn *);
extern void arcv_sched_fusion_priority (rtx_insn *, int, int *, int *);
+extern void arcv_sched_init (void);
+extern int arcv_sched_reorder2 (rtx_insn **, int *);
+extern int arcv_sched_adjust_priority (rtx_insn *, int);
+extern int arcv_sched_adjust_cost (rtx_insn *, int, int);
+extern bool arcv_can_issue_more_p (int, int);
+extern int arcv_sched_variable_issue (rtx_insn *, int);
extern bool strided_load_broadcast_p (void);
extern bool riscv_prefer_agnostic_p (void);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index fabd381e159..7e6629fef6c 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -340,6 +340,9 @@ unsigned riscv_stack_boundary;
/* Whether in riscv_output_mi_thunk. */
static bool riscv_in_thunk_func = false;
+/* Return true if the instruction fusion described by OP is enabled. */
+static bool riscv_fusion_enabled_p (enum riscv_fusion_pairs op);
+
/* If non-zero, this is an offset to be added to SP to redefine the CFA
when restoring the FP register from the stack. Only valid when generating
the epilogue. */
@@ -10952,12 +10955,20 @@ static void
riscv_sched_init (FILE *, int, int)
{
clear_vconfig ();
+
+ if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV))
+ arcv_sched_init ();
}
/* Implement TARGET_SCHED_VARIABLE_ISSUE. */
static int
riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
{
+
+ if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV))
+ if (!arcv_can_issue_more_p (riscv_issue_rate (), more))
+ return 0;
+
if (DEBUG_INSN_P (insn))
return more;
@@ -11003,6 +11014,9 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn
*insn, int more)
}
}
+ if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV))
+ return arcv_sched_variable_issue (insn, more);
+
return more - 1;
}
@@ -11813,17 +11827,21 @@ riscv_sched_fusion_priority (rtx_insn *insn, int
max_pri, int *fusion_pri,
we currently only perform the adjustment when -madjust-lmul-cost is given.
*/
static int
-riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
+riscv_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int
cost,
unsigned int)
{
+ /* Use ARCV-specific cost adjustment for RHX-100. */
+ if (TARGET_ARCV_RHX100)
+ return arcv_sched_adjust_cost (insn, dep_type, cost);
+
/* Only do adjustments for the generic out-of-order scheduling model. */
if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo)
return cost;
- if (recog_memoized (insn) < 0)
+ if (recog_memoized (dep_insn) < 0)
return cost;
- enum attr_type type = get_attr_type (insn);
+ enum attr_type type = get_attr_type (dep_insn);
if (type == TYPE_VFREDO || type == TYPE_VFWREDO)
{
@@ -11841,7 +11859,7 @@ riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn
*insn, int cost,
return cost;
enum riscv_vector::vlmul_type lmul =
- (riscv_vector::vlmul_type)get_attr_vlmul (insn);
+ (riscv_vector::vlmul_type)get_attr_vlmul (dep_insn);
double factor = 1;
switch (lmul)
@@ -11895,6 +11913,32 @@ riscv_sched_can_speculate_insn (rtx_insn *insn)
}
}
+/* Implement TARGET_SCHED_ADJUST_PRIORITY hook. */
+
+static int
+riscv_sched_adjust_priority (rtx_insn *insn, int priority)
+{
+ if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV))
+ return arcv_sched_adjust_priority (insn, priority);
+
+ return priority;
+}
+
+/* Implement TARGET_SCHED_REORDER2 hook. */
+
+static int
+riscv_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED,
+ int verbose ATTRIBUTE_UNUSED,
+ rtx_insn **ready,
+ int *n_readyp,
+ int clock ATTRIBUTE_UNUSED)
+{
+ if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV))
+ return arcv_sched_reorder2 (ready, n_readyp);
+
+ return 0;
+}
+
/* Auxiliary function to emit RISC-V ELF attribute. */
static void
riscv_emit_attribute ()
@@ -16477,6 +16521,12 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode
mode)
#undef TARGET_SCHED_CAN_SPECULATE_INSN
#define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn
+#undef TARGET_SCHED_ADJUST_PRIORITY
+#define TARGET_SCHED_ADJUST_PRIORITY riscv_sched_adjust_priority
+
+#undef TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 riscv_sched_reorder2
+
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 58f30c4902b..2633aebf57b 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -519,7 +519,7 @@
vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,
vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,
vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,
-
vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,
+
vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,imul_fused,alu_fused,
sf_vc,sf_vc_se"
(cond [(eq_attr "got" "load") (const_string "load")
--
2.34.0