This patch implements the TARGET_SCHED_FUSION_PRIORITY hook for the Synopsys
RHX-100 processor to improve instruction scheduling by prioritizing fusible
memory operations.

To take better advantage of double load/store fusion, make use of the
sched_fusion pass that assigns unique "fusion priorities" to load/store
instructions and schedules operations on adjacent addresses together. This
maximizes the probability that loads/stores are fused between each other
rather than with other instructions.

gcc/ChangeLog:

        * config/riscv/arcv.cc (arcv_fusion_load_store): New function.
        (arcv_sched_fusion_priority): New function.
        * config/riscv/riscv.cc (riscv_sched_fusion_priority): New function.
        (TARGET_SCHED_FUSION_PRIORITY): Define hook.

Co-authored-by: Artemiy Volkov <[email protected]>
Co-authored-by: Michiel Derhaeg <[email protected]>
Signed-off-by: Luis Silva <[email protected]>
---
 gcc/config/riscv/arcv.cc        | 101 ++++++++++++++++++++++++++++++++
 gcc/config/riscv/riscv-protos.h |   1 +
 gcc/config/riscv/riscv.cc       |  17 ++++++
 3 files changed, 119 insertions(+)

diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc
index f5d5fb6cd22..af237660226 100644
--- a/gcc/config/riscv/arcv.cc
+++ b/gcc/config/riscv/arcv.cc
@@ -497,3 +497,104 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 
   return false;
 }
+
+/* If INSN is a load or store of address in the form of [base+offset],
+   extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
+   to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
+   otherwise return FALSE.  */
+
+static bool
+arcv_fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset,
+                       machine_mode *mode, bool *is_load)
+{
+  rtx x, dest, src;
+
+  gcc_assert (INSN_P (insn));
+  x = PATTERN (insn);
+  if (GET_CODE (x) != SET)
+    return false;
+
+  src = SET_SRC (x);
+  dest = SET_DEST (x);
+
+  if ((GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
+      && MEM_P (XEXP (src, 0)))
+    src = XEXP (src, 0);
+
+  if (REG_P (src) && MEM_P (dest))
+    {
+      *is_load = false;
+      if (extract_base_offset_in_addr (dest, base, offset))
+       *mode = GET_MODE (dest);
+    }
+  else if (MEM_P (src) && REG_P (dest))
+    {
+      *is_load = true;
+      if (extract_base_offset_in_addr (src, base, offset))
+       *mode = GET_MODE (src);
+    }
+  else
+    return false;
+
+  return (*base != NULL_RTX && *offset != NULL_RTX);
+}
+
+void
+arcv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
+                            int *pri)
+{
+  rtx base, offset;
+  machine_mode mode = SImode;
+  bool is_load;
+
+  gcc_assert (INSN_P (insn));
+
+  /* Default priority for non-fusible instructions.  */
+  int default_pri = max_pri - 1;
+
+  /* Check if this is a fusible load/store instruction.  */
+  if (!arcv_fusion_load_store (insn, &base, &offset, &mode, &is_load)
+      || !arcv_pair_fusion_mode_allowed_p (mode, is_load))
+    {
+      *pri = default_pri;
+      *fusion_pri = default_pri;
+      return;
+    }
+
+  /* Start with half the default priority to distinguish fusible from
+     non-fusible instructions.  */
+  int priority = default_pri / 2;
+
+  /* Scale priority by access width - narrower accesses get lower priority.
+     HImode: divide by 2, QImode: divide by 4.  This encourages wider
+     accesses to be scheduled together.  */
+  if (mode == HImode)
+    priority /= 2;
+  else if (mode == QImode)
+    priority /= 4;
+
+  /* Factor in base register: instructions with smaller register numbers
+     get higher priority.  The shift by 20 bits ensures this is the most
+     significant component of the priority.  */
+  const int BASE_REG_SHIFT = 20;
+  const int BASE_REG_MASK = 0xff;
+  priority -= ((REGNO (base) & BASE_REG_MASK) << BASE_REG_SHIFT);
+
+  /* Calculate fusion priority: group loads/stores with adjacent addresses
+     into the same scheduling group.  We divide the offset by (mode_size * 2)
+     to group pairs of adjacent accesses, then shift left by 1 to make room
+     for the load/store bit.  */
+  int off_val = (int)(INTVAL (offset));
+  int addr_group = off_val / (GET_MODE_SIZE (mode).to_constant () * 2);
+  *fusion_pri = priority - (addr_group << 1) + is_load;
+
+  /* Factor in the actual offset value: instructions with smaller offsets
+     get higher priority.  We use only the lower 20 bits to avoid overflow.  */
+  const int OFFSET_MASK = 0xfffff;
+  if (off_val >= 0)
+    priority -= (off_val & OFFSET_MASK);
+  else
+    priority += ((-off_val) & OFFSET_MASK);
+
+  *pri = priority;
+}
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 2dcf8a4d697..c3157c084d1 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -841,6 +841,7 @@ extern bool th_print_operand_address (FILE *, machine_mode, 
rtx);
 
 /* Routines implemented in arcv.cc.  */
 extern bool arcv_macro_fusion_pair_p (rtx_insn *, rtx_insn *);
+extern void arcv_sched_fusion_priority (rtx_insn *, int, int *, int *);
 
 extern bool strided_load_broadcast_p (void);
 extern bool riscv_prefer_agnostic_p (void);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 9c0b55eca0d..fabd381e159 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11790,6 +11790,21 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn 
*curr)
   return false;
 }
 
+static void
+riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
+                            int *pri)
+{
+  if (TARGET_ARCV_RHX100)
+    {
+      arcv_sched_fusion_priority (insn, max_pri, fusion_pri, pri);
+      return;
+    }
+
+  /* Default priority.  */
+  *pri = max_pri - 1;
+  *fusion_pri = max_pri - 1;
+}
+
 /* Adjust the cost/latency of instructions for scheduling.
    For now this is just used to change the latency of vector instructions
    according to their LMUL.  We assume that an insn with LMUL == 8 requires
@@ -16444,6 +16459,8 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode 
mode)
 #define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 #define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
+#undef TARGET_SCHED_FUSION_PRIORITY
+#define TARGET_SCHED_FUSION_PRIORITY riscv_sched_fusion_priority
 
 #undef TARGET_SCHED_INIT
 #define TARGET_SCHED_INIT riscv_sched_init
-- 
2.34.0

Reply via email to