This patch implements instruction scheduling support for the dual-issue Synopsys
RHX-100 processor by adding scheduler hooks and state tracking for the two
execution pipes.

These hooks address microarchitectural details not covered by the pipeline
description.

The riscv_sched_variable_issue () and riscv_sched_reorder2 () hooks work
together to make sure that:
  - the critical path and the instruction priorities are respected;
  - both pipes are filled (taking advantage of parallel dispatch within the
    microarchitectural constraints);
  - there is as much fusion going on as possible;
  - the existing fusion pairs are not broken up.

riscv_sched_adjust_priority () slightly bumps the priority of load/store pairs.
As a result it becomes easier for riscv_sched_reorder2 () to schedule
instructions in the memory pipe.

gcc/ChangeLog:

        * config/riscv/riscv-protos.h (arcv_sched_init): New declaration.
        (arcv_sched_reorder2): declaration.
        (arcv_sched_adjust_priority): New declaration.
        (arcv_sched_adjust_cost): New declaration.
        (arcv_can_issue_more_p): New declaration.
        (arcv_sched_variable_issue): New declaration.
        * config/riscv/arcv.cc (struct arcv_sched_state): New struct.
        (arcv_sched_init): New function.
        (arcv_next_fusible_insn): New function.
        (arcv_sched_reorder2): New function.
        (arcv_sched_adjust_priority): New function.
        (arcv_sched_adjust_cost): New function.
        (arcv_can_issue_more_p): New function.
        (arcv_sched_variable_issue): New function.
        * config/riscv/riscv.cc (riscv_fusion_enabled_p): Add forward
        declaration.
        (riscv_sched_init): Add call to arcv_shed_init.
        (riscv_sched_variable_issue): Add ARC-V-specific handling.
        (riscv_sched_adjust_cost): Add ARC-V-specific cost adjustment and fix
        parameter names.
        (riscv_sched_adjust_priority): New function.
        (riscv_sched_reorder2): New function.
        (TARGET_SCHED_ADJUST_PRIORITY): Define hook.
        (TARGET_SCHED_REORDER2): Define hook.
        * config/riscv/riscv.h (TARGET_ARCV_RHX100): New macro.

Co-authored-by: Artemiy Volkov <[email protected]>
Co-authored-by: Michiel Derhaeg <[email protected]>
Co-authored-by: Alex Turjan <[email protected]>
Signed-off-by: Luis Silva <[email protected]>
---
 gcc/config/riscv/arcv.cc        | 287 ++++++++++++++++++++++++++++++++
 gcc/config/riscv/riscv-protos.h |   6 +
 gcc/config/riscv/riscv.cc       |  58 ++++++-
 gcc/config/riscv/riscv.md       |   2 +-
 4 files changed, 348 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc
index af237660226..df042f9947b 100644
--- a/gcc/config/riscv/arcv.cc
+++ b/gcc/config/riscv/arcv.cc
@@ -49,6 +49,32 @@ along with GCC; see the file COPYING3.  If not see
 #include "sched-int.h"
 #include "tm-constrs.h"
 
+/* Scheduler state tracking for dual-pipe ARCV architectures.  */
+
+struct arcv_sched_state {
+  /* True if the ALU pipe has been scheduled for the current cycle.
+     The ALU pipe handles arithmetic, logical, and other computational
+     instructions.  */
+  int alu_pipe_scheduled_p;
+
+  /* True if pipe B has been scheduled for the current cycle.
+     Pipe B is the second execution pipe, typically used for memory
+     operations (loads/stores) but can also handle other instructions.  */
+  int pipeB_scheduled_p;
+
+  /* The last instruction that was scheduled.  Used to detect fusion
+     opportunities by looking ahead at the next instruction to be
+     scheduled.  */
+  rtx_insn *last_scheduled_insn;
+
+  /* Cached value of how many more instructions can be issued in the
+     current cycle.  Updated as instructions are scheduled and pipes
+     become occupied.  */
+  short cached_can_issue_more;
+};
+
+static struct arcv_sched_state sched_state;
+
 /* Return TRUE if the target microarchitecture supports macro-op
    fusion for two memory operations of mode MODE (the direction
    of transfer is determined by the IS_LOAD parameter).  */
@@ -498,6 +524,218 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
   return false;
 }
 
+/* Initialize ARCV scheduler state at the beginning of scheduling.  */
+
+void
+arcv_sched_init (void)
+{
+  sched_state.last_scheduled_insn = 0;
+}
+
+/* Return the next possible fusible insn.  */
+
+static rtx_insn *
+arcv_next_fusible_insn (rtx_insn *insn)
+{
+  while (insn)
+    {
+      insn = NEXT_INSN (insn);
+
+      if (insn == 0)
+       break;
+
+      if (DEBUG_INSN_P (insn)
+         || NOTE_P (insn))
+       continue;
+
+      if (NOTE_INSN_BASIC_BLOCK_P (insn))
+       return NULL;
+
+      if (GET_CODE (insn) == CODE_LABEL
+         || GET_CODE (insn) == BARRIER
+         || GET_CODE (PATTERN (insn)) == USE)
+       continue;
+
+      if (JUMP_TABLE_DATA_P (insn))
+       return NULL;
+
+      break;
+    }
+
+  return insn;
+}
+
+/* Try to reorder ready queue to promote ARCV fusion opportunities.
+   Returns the number of instructions that can be issued this cycle.  */
+
+int
+arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp)
+{
+  if (sched_fusion)
+    return sched_state.cached_can_issue_more;
+
+  if (!sched_state.cached_can_issue_more)
+    return 0;
+
+  /* Fuse double load/store instances missed by sched_fusion.  */
+  if (!sched_state.pipeB_scheduled_p && sched_state.last_scheduled_insn
+      && ready && *n_readyp > 0
+      && !SCHED_GROUP_P (sched_state.last_scheduled_insn)
+      && (get_attr_type (sched_state.last_scheduled_insn) == TYPE_LOAD
+         || get_attr_type (sched_state.last_scheduled_insn) == TYPE_STORE))
+    {
+      for (int i = 1; i <= *n_readyp; i++)
+       {
+         rtx_insn* next_insn = arcv_next_fusible_insn (ready[*n_readyp - i]);
+         /* Try to fuse the last_scheduled_insn with.  */
+         /* Fuse only with nondebug insn.  */
+         if (NONDEBUG_INSN_P (ready[*n_readyp - i])
+             /* Which have not been already fused.  */
+             && !SCHED_GROUP_P (ready[*n_readyp - i])
+             && (!next_insn || !SCHED_GROUP_P (next_insn))
+             && arcv_macro_fusion_pair_p (sched_state.last_scheduled_insn,
+                                          ready[*n_readyp - i]))
+           {
+             std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
+             SCHED_GROUP_P (ready[*n_readyp - 1]) = 1;
+             sched_state.pipeB_scheduled_p = 1;
+             return sched_state.cached_can_issue_more;
+           }
+       }
+      sched_state.pipeB_scheduled_p = 1;
+    }
+
+  /* Try to fuse a non-memory last_scheduled_insn.  */
+  if ((!sched_state.alu_pipe_scheduled_p || !sched_state.pipeB_scheduled_p)
+      && sched_state.last_scheduled_insn && ready && *n_readyp > 0
+      && !SCHED_GROUP_P (sched_state.last_scheduled_insn)
+      && (get_attr_type (sched_state.last_scheduled_insn) != TYPE_LOAD
+         && get_attr_type (sched_state.last_scheduled_insn) != TYPE_STORE))
+    {
+      for (int i = 1; i <= *n_readyp; i++)
+       {
+         rtx_insn* next_insn = arcv_next_fusible_insn (ready[*n_readyp - i]);
+         if (NONDEBUG_INSN_P (ready[*n_readyp - i])
+             && !SCHED_GROUP_P (ready[*n_readyp - i])
+             && active_insn_p (ready[*n_readyp - i])
+             && (!next_insn || !SCHED_GROUP_P (next_insn))
+             && arcv_macro_fusion_pair_p (sched_state.last_scheduled_insn,
+                                          ready[*n_readyp - i]))
+           {
+             if (GET_CODE (PATTERN (ready[*n_readyp - i])) == USE)
+               continue;
+
+             if (get_attr_type (ready[*n_readyp - i]) == TYPE_LOAD
+                 || get_attr_type (ready[*n_readyp - i]) == TYPE_STORE)
+             {
+               if (sched_state.pipeB_scheduled_p)
+                 continue;
+               else
+                 sched_state.pipeB_scheduled_p = 1;
+             }
+             else if (!sched_state.alu_pipe_scheduled_p)
+               sched_state.alu_pipe_scheduled_p = 1;
+             else
+               sched_state.pipeB_scheduled_p = 1;
+
+             std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
+             SCHED_GROUP_P (ready[*n_readyp - 1]) = 1;
+             return sched_state.cached_can_issue_more;
+           }
+       }
+      sched_state.alu_pipe_scheduled_p = 1;
+    }
+
+  /* When pipe B is scheduled, we can have no more memops this cycle.  */
+  if (sched_state.pipeB_scheduled_p && *n_readyp > 0
+      && NONDEBUG_INSN_P (ready[*n_readyp - 1])
+      && recog_memoized (ready[*n_readyp - 1]) >= 0
+      && !SCHED_GROUP_P (ready[*n_readyp - 1])
+      && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD
+         || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE))
+  {
+    if (sched_state.alu_pipe_scheduled_p)
+      return 0;
+
+    for (int i = 2; i <= *n_readyp; i++)
+      {
+       rtx_insn* next_insn = arcv_next_fusible_insn (ready[*n_readyp - i]);
+       if ((NONDEBUG_INSN_P (ready[*n_readyp - i])
+            && recog_memoized (ready[*n_readyp - i]) >= 0
+            && get_attr_type (ready[*n_readyp - i]) != TYPE_LOAD
+            && get_attr_type (ready[*n_readyp - i]) != TYPE_STORE
+            && !SCHED_GROUP_P (ready[*n_readyp - i])
+            && (!next_insn || !SCHED_GROUP_P (next_insn)))
+           || (next_insn && NONDEBUG_INSN_P (next_insn)
+               && recog_memoized (next_insn) >= 0
+               && get_attr_type (next_insn) != TYPE_LOAD
+               && get_attr_type (next_insn) != TYPE_STORE))
+         {
+           std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
+           sched_state.alu_pipe_scheduled_p = 1;
+           sched_state.cached_can_issue_more = 1;
+           return 1;
+         }
+      }
+    return 0;
+  }
+
+  /* If all else fails, schedule a single instruction.  */
+  if (ready && *n_readyp > 0
+      && NONDEBUG_INSN_P (ready[*n_readyp - 1])
+      && recog_memoized (ready[*n_readyp - 1]) >= 0)
+  {
+    rtx_insn *insn = ready[*n_readyp - 1];
+    enum attr_type insn_type = get_attr_type (insn);
+
+    /* Memory operations go to pipeB if available.  */
+    if (!sched_state.pipeB_scheduled_p
+       && (insn_type == TYPE_LOAD || insn_type == TYPE_STORE))
+    {
+      sched_state.pipeB_scheduled_p = 1;
+    }
+    /* Non-memory operations go to ALU pipe.  */
+    else if (insn_type != TYPE_LOAD && insn_type != TYPE_STORE)
+    {
+      sched_state.alu_pipe_scheduled_p = 1;
+    }
+  }
+
+  return sched_state.cached_can_issue_more;
+}
+
+int
+arcv_sched_adjust_priority (rtx_insn *insn, int priority)
+{
+  if (DEBUG_INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    return priority;
+
+  /* Bump the priority of fused load-store pairs for easier
+     scheduling of the memory pipe.  The specific increase
+     value is determined empirically.  */
+  if (next_insn (insn) && INSN_P (next_insn (insn))
+      && SCHED_GROUP_P (next_insn (insn))
+      && ((get_attr_type (insn) == TYPE_STORE
+          && get_attr_type (next_insn (insn)) == TYPE_STORE)
+        || (get_attr_type (insn) == TYPE_LOAD
+            && get_attr_type (next_insn (insn)) == TYPE_LOAD)))
+    return priority + 1;
+
+  return priority;
+}
+
+/* Adjust scheduling cost for ARCV fusion.  */
+
+int
+arcv_sched_adjust_cost (rtx_insn *insn, int dep_type, int cost)
+{
+  if (dep_type == REG_DEP_ANTI && !SCHED_GROUP_P (insn))
+    return cost + 1;
+
+  return cost;
+}
+
 /* If INSN is a load or store of address in the form of [base+offset],
    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
@@ -598,3 +836,52 @@ arcv_sched_fusion_priority (rtx_insn *insn, int max_pri, 
int *fusion_pri,
 
   *pri = priority;
 }
+
+bool
+arcv_can_issue_more_p (int issue_rate, int more)
+{
+  /* Beginning of cycle - reset variables.  */
+  if (more == issue_rate)
+    {
+      sched_state.alu_pipe_scheduled_p = 0;
+      sched_state.pipeB_scheduled_p = 0;
+    }
+
+  if (sched_state.alu_pipe_scheduled_p && sched_state.pipeB_scheduled_p)
+    {
+      sched_state.cached_can_issue_more = 0;
+      return false;
+    }
+
+  sched_state.cached_can_issue_more = more;
+
+  return true;
+}
+
+int
+arcv_sched_variable_issue (rtx_insn *insn, int more)
+{
+  if (next_insn (insn) && INSN_P (next_insn (insn))
+      && SCHED_GROUP_P (next_insn (insn)))
+    {
+      if (get_attr_type (insn) == TYPE_LOAD
+         || get_attr_type (insn) == TYPE_STORE
+         || get_attr_type (next_insn (insn)) == TYPE_LOAD
+         || get_attr_type (next_insn (insn)) == TYPE_STORE)
+       sched_state.pipeB_scheduled_p = 1;
+      else
+       sched_state.alu_pipe_scheduled_p = 1;
+    }
+
+  if (get_attr_type (insn) == TYPE_ALU_FUSED
+      || get_attr_type (insn) == TYPE_IMUL_FUSED)
+    {
+      sched_state.alu_pipe_scheduled_p = 1;
+      more -= 1;
+    }
+
+  sched_state.last_scheduled_insn = insn;
+  sched_state.cached_can_issue_more = more - 1;
+
+  return sched_state.cached_can_issue_more;
+}
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index c3157c084d1..618152f2494 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -842,6 +842,12 @@ extern bool th_print_operand_address (FILE *, 
machine_mode, rtx);
 /* Routines implemented in arcv.cc.  */
 extern bool arcv_macro_fusion_pair_p (rtx_insn *, rtx_insn *);
 extern void arcv_sched_fusion_priority (rtx_insn *, int, int *, int *);
+extern void arcv_sched_init (void);
+extern int arcv_sched_reorder2 (rtx_insn **, int *);
+extern int arcv_sched_adjust_priority (rtx_insn *, int);
+extern int arcv_sched_adjust_cost (rtx_insn *, int, int);
+extern bool arcv_can_issue_more_p (int, int);
+extern int arcv_sched_variable_issue (rtx_insn *, int);
 
 extern bool strided_load_broadcast_p (void);
 extern bool riscv_prefer_agnostic_p (void);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index fabd381e159..7e6629fef6c 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -340,6 +340,9 @@ unsigned riscv_stack_boundary;
 /* Whether in riscv_output_mi_thunk. */
 static bool riscv_in_thunk_func = false;
 
+/* Return true if the instruction fusion described by OP is enabled.  */
+static bool riscv_fusion_enabled_p (enum riscv_fusion_pairs op);
+
 /* If non-zero, this is an offset to be added to SP to redefine the CFA
    when restoring the FP register from the stack.  Only valid when generating
    the epilogue.  */
@@ -10952,12 +10955,20 @@ static void
 riscv_sched_init (FILE *, int, int)
 {
   clear_vconfig ();
+
+  if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV))
+    arcv_sched_init ();
 }
 
 /* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
 static int
 riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
 {
+
+  if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV))
+    if (!arcv_can_issue_more_p (riscv_issue_rate (), more))
+      return 0;
+
   if (DEBUG_INSN_P (insn))
     return more;
 
@@ -11003,6 +11014,9 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn 
*insn, int more)
        }
     }
 
+  if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV))
+    return arcv_sched_variable_issue (insn, more);
+
   return more - 1;
 }
 
@@ -11813,17 +11827,21 @@ riscv_sched_fusion_priority (rtx_insn *insn, int 
max_pri, int *fusion_pri,
    we currently only perform the adjustment when -madjust-lmul-cost is given.
    */
 static int
-riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
+riscv_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int 
cost,
                         unsigned int)
 {
+  /* Use ARCV-specific cost adjustment for RHX-100.  */
+  if (TARGET_ARCV_RHX100)
+    return arcv_sched_adjust_cost (insn, dep_type, cost);
+
   /* Only do adjustments for the generic out-of-order scheduling model.  */
   if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo)
     return cost;
 
-  if (recog_memoized (insn) < 0)
+  if (recog_memoized (dep_insn) < 0)
     return cost;
 
-  enum attr_type type = get_attr_type (insn);
+  enum attr_type type = get_attr_type (dep_insn);
 
   if (type == TYPE_VFREDO || type == TYPE_VFWREDO)
     {
@@ -11841,7 +11859,7 @@ riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn 
*insn, int cost,
     return cost;
 
   enum riscv_vector::vlmul_type lmul =
-    (riscv_vector::vlmul_type)get_attr_vlmul (insn);
+    (riscv_vector::vlmul_type)get_attr_vlmul (dep_insn);
 
   double factor = 1;
   switch (lmul)
@@ -11895,6 +11913,32 @@ riscv_sched_can_speculate_insn (rtx_insn *insn)
     }
 }
 
+/* Implement TARGET_SCHED_ADJUST_PRIORITY hook.  */
+
+static int
+riscv_sched_adjust_priority (rtx_insn *insn, int priority)
+{
+  if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV))
+    return arcv_sched_adjust_priority (insn, priority);
+
+  return priority;
+}
+
+/* Implement TARGET_SCHED_REORDER2 hook.  */
+
+static int
+riscv_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED,
+                     int verbose ATTRIBUTE_UNUSED,
+                     rtx_insn **ready,
+                     int *n_readyp,
+                     int clock ATTRIBUTE_UNUSED)
+{
+  if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV))
+    return arcv_sched_reorder2 (ready, n_readyp);
+
+  return 0;
+}
+
 /* Auxiliary function to emit RISC-V ELF attribute. */
 static void
 riscv_emit_attribute ()
@@ -16477,6 +16521,12 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode 
mode)
 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 #define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn
 
+#undef  TARGET_SCHED_ADJUST_PRIORITY
+#define TARGET_SCHED_ADJUST_PRIORITY riscv_sched_adjust_priority
+
+#undef  TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 riscv_sched_reorder2
+
 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
 #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 58f30c4902b..2633aebf57b 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -519,7 +519,7 @@
    vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,
    
vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,
    
vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,
-   
vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,
+   
vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,imul_fused,alu_fused,
    sf_vc,sf_vc_se"
   (cond [(eq_attr "got" "load") (const_string "load")
 
-- 
2.34.0

Reply via email to