+guojie

在 2023/4/23 下午9:19, Xi Ruoyao 写道:
This commit implements the target macros for shrink wrapping of function
prologues/epilogues shrink wrapping on LoongArch.

Bootstrapped and regtested on loongarch64-linux-gnu.  I don't have an
access to SPEC CPU so I hope the reviewer can perform a benchmark to see
if there is real benefit.

gcc/ChangeLog:

        * config/loongarch/loongarch.h (struct machine_function): Add
        reg_is_wrapped_separately array for register wrapping
        information.
        * config/loongarch/loongarch.cc
        (loongarch_get_separate_components): New function.
        (loongarch_components_for_bb): Likewise.
        (loongarch_disqualify_components): Likewise.
        (loongarch_process_components): Likewise.
        (loongarch_emit_prologue_components): Likewise.
        (loongarch_emit_epilogue_components): Likewise.
        (loongarch_set_handled_components): Likewise.
        (TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS): Define.
        (TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB): Likewise.
        (TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS): Likewise.
        (TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS): Likewise.
        (TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS): Likewise.
        (TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS): Likewise.
        (loongarch_for_each_saved_reg): Skip registers that are wrapped
        separately.

gcc/testsuite/ChangeLog:

        * gcc.target/loongarch/shrink-wrap.c: New test.
---
  gcc/config/loongarch/loongarch.cc             | 179 +++++++++++++++++-
  gcc/config/loongarch/loongarch.h              |   2 +
  .../gcc.target/loongarch/shrink-wrap.c        |  22 +++
  3 files changed, 200 insertions(+), 3 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/loongarch/shrink-wrap.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index e523fcb6b7f..d0024237a6a 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -64,6 +64,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "builtins.h"
  #include "rtl-iter.h"
  #include "opts.h"
+#include "function-abi.h"
/* This file should be included last. */
  #include "target-def.h"
@@ -1017,19 +1018,23 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
    for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
      if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
        {
-       loongarch_save_restore_reg (word_mode, regno, offset, fn);
+       if (!cfun->machine->reg_is_wrapped_separately[regno])
+         loongarch_save_restore_reg (word_mode, regno, offset, fn);
+
        offset -= UNITS_PER_WORD;
        }
/* This loop must iterate over the same space as its companion in
       loongarch_compute_frame_info.  */
    offset = cfun->machine->frame.fp_sp_offset - sp_offset;
+  machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
+
    for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
      if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
        {
-       machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
+       if (!cfun->machine->reg_is_wrapped_separately[regno])
+         loongarch_save_restore_reg (word_mode, regno, offset, fn);
- loongarch_save_restore_reg (mode, regno, offset, fn);
        offset -= GET_MODE_SIZE (mode);
        }
  }
@@ -6644,6 +6649,151 @@ loongarch_asan_shadow_offset (void)
    return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
  }
+static sbitmap
+loongarch_get_separate_components (void)
+{
+  HOST_WIDE_INT offset;
+  sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
+  bitmap_clear (components);
+  offset = cfun->machine->frame.gp_sp_offset;
+
+  /* The stack should be aligned to 16-bytes boundary, so we can make the use
+     of ldptr instructions.  */
+  gcc_assert (offset % UNITS_PER_WORD == 0);
+
+  for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
+      {
+       /* We can wrap general registers saved at [sp, sp + 32768) using the
+          ldptr/stptr instructions.  For large offsets a pseudo register
+          might be needed which cannot be created during the shrink
+          wrapping pass.
+
+          TODO: This may need a revise when we add LA32 as ldptr.w is not
+          guaranteed available by the manual.  */
+       if (offset < 32768)
+         bitmap_set_bit (components, regno);
+
+       offset -= UNITS_PER_WORD;
+      }
+
+  offset = cfun->machine->frame.fp_sp_offset;
+  for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+    if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
+      {
+       /* We can only wrap FP registers with imm12 offsets.  For large
+          offsets a pseudo register might be needed which cannot be
+          created during the shrink wrapping pass.  */
+       if (IMM12_OPERAND (offset))
+         bitmap_set_bit (components, regno);
+
+       offset -= UNITS_PER_FPREG;
+      }
+
+  /* Don't mess with the hard frame pointer.  */
+  if (frame_pointer_needed)
+    bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
+
+  bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
+
+  return components;
+}
+
+static sbitmap
+loongarch_components_for_bb (basic_block bb)
+{
+  /* Registers are used in a bb if they are in the IN, GEN, or KILL sets.  */
+  auto_bitmap used;
+  bitmap_copy (used, DF_LIVE_IN (bb));
+  bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->gen);
+  bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->kill);
+
+  sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
+  bitmap_clear (components);
+
+  function_abi_aggregator callee_abis;
+  rtx_insn *insn;
+  FOR_BB_INSNS (bb, insn)
+    if (CALL_P (insn))
+      callee_abis.note_callee_abi (insn_callee_abi (insn));
+
+  HARD_REG_SET extra_caller_saves =
+    callee_abis.caller_save_regs (*crtl->abi);
+
+  for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    if (!fixed_regs[regno]
+       && !crtl->abi->clobbers_full_reg_p (regno)
+       && (TEST_HARD_REG_BIT (extra_caller_saves, regno) ||
+           bitmap_bit_p (used, regno)))
+      bitmap_set_bit (components, regno);
+
+  for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+    if (!fixed_regs[regno]
+       && !crtl->abi->clobbers_full_reg_p (regno)
+       && (TEST_HARD_REG_BIT (extra_caller_saves, regno) ||
+           bitmap_bit_p (used, regno)))
+      bitmap_set_bit (components, regno);
+
+  return components;
+}
+
+static void
+loongarch_disqualify_components (sbitmap, edge, sbitmap, bool)
+{
+  /* Do nothing.  */
+}
+
+static void
+loongarch_process_components (sbitmap components, loongarch_save_restore_fn fn)
+{
+  HOST_WIDE_INT offset = cfun->machine->frame.gp_sp_offset;
+
+  for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
+      {
+       if (bitmap_bit_p (components, regno))
+         loongarch_save_restore_reg (word_mode, regno, offset, fn);
+
+       offset -= UNITS_PER_WORD;
+      }
+
+  offset = cfun->machine->frame.fp_sp_offset;
+  machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
+
+  for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+    if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
+      {
+       if (bitmap_bit_p (components, regno))
+         loongarch_save_restore_reg (mode, regno, offset, fn);
+
+       offset -= UNITS_PER_FPREG;
+      }
+}
+
+static void
+loongarch_emit_prologue_components (sbitmap components)
+{
+  loongarch_process_components (components, loongarch_save_reg);
+}
+
+static void
+loongarch_emit_epilogue_components (sbitmap components)
+{
+  loongarch_process_components (components, loongarch_restore_reg);
+}
+
+static void
+loongarch_set_handled_components (sbitmap components)
+{
+    for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+      if (bitmap_bit_p (components, regno))
+       cfun->machine->reg_is_wrapped_separately[regno] = true;
+
+    for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+      if (bitmap_bit_p (components, regno))
+       cfun->machine->reg_is_wrapped_separately[regno] = true;
+}
+
  /* Initialize the GCC target structure.  */
  #undef TARGET_ASM_ALIGNED_HI_OP
  #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -6841,6 +6991,29 @@ loongarch_asan_shadow_offset (void)
  #undef TARGET_ASAN_SHADOW_OFFSET
  #define TARGET_ASAN_SHADOW_OFFSET loongarch_asan_shadow_offset
+#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
+#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
+  loongarch_get_separate_components
+
+#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
+#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB loongarch_components_for_bb
+
+#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
+#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
+  loongarch_disqualify_components
+
+#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
+  loongarch_emit_prologue_components
+
+#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
+  loongarch_emit_epilogue_components
+
+#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
+#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
+  loongarch_set_handled_components
+
  struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-loongarch.h"
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index a9eff6a81bd..829acdaa9be 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -1147,6 +1147,8 @@ struct GTY (()) machine_function
    /* The current frame information, calculated by 
loongarch_compute_frame_info.
     */
    struct loongarch_frame_info frame;
+
+  bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
  };
  #endif
diff --git a/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c b/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
new file mode 100644
index 00000000000..f2c867a2769
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fshrink-wrap" } */
+
+/* f(x) should do nothing if x is 0.  */
+/* { dg-final { scan-assembler "bnez\t\\\$r4,\[^\n\]*\n\tjr\t\\\$r1" } } */
+
+void g(void);
+
+void
+f(int x)
+{
+  if (x)
+    {
+      register int s0 asm("s0") = x;
+      register int s1 asm("s1") = x;
+      register int s2 asm("s2") = x;
+      asm("" : : "r"(s0));
+      asm("" : : "r"(s1));
+      asm("" : : "r"(s2));
+      g();
+    }
+}

Reply via email to