On 11/3/22 18:53, Kevin Lee wrote:
This is the identical patch with 
https://gcc.gnu.org/pipermail/gcc-patches/2022-November/604814.html, but with 
the correct plaintext format.

The loop still seems a bit odd which may point to further improvements
that could be made to this patch.  Consider this fragment of the loop:
Thank you for the review Jeff! I am currently looking into this issue
in a different patch. I'll come back with some improvement.
gcc/ChangeLog:
    Jim Wilson <jim.wilson....@gmail.com>
    Michael Collison <colli...@rivosinc.com>
    Kevin Lee <kev...@rivosinc.com>
* config/riscv/predicates.md (const_lui_operand): New Predicate.
        (add_operand): Ditto.
        (reg_or_const_int_operand): Ditto.
        * config/riscv/riscv-protos.h (riscv_eliminable_reg): New
    function.
        * config/riscv/riscv-selftests.cc (calculate_x_in_sequence):
    Consider Parallel insns.
        * config/riscv/riscv.cc (riscv_eliminable_reg): New function.
        (riscv_adjust_libcall_cfi_prologue): Use gen_rtx_SET and
    gen_rtx_fmt_ee instead of gen_add3_insn.
        (riscv_adjust_libcall_cfi_epilogue): Ditto.
        * config/riscv/riscv.md (addsi3): Remove.
        (add<mode>3): New instruction for large stack frame
    optimization.
        (add<mode>3_internal): Ditto.
        (adddi3): Remove.
        (add<mode>3_internal2): New instruction for insns generated in
    the prologue and epilogue pass.
---

diff --git a/gcc/config/riscv/riscv-selftests.cc 
b/gcc/config/riscv/riscv-selftests.cc
index 636874ebc0f..50457db708e 100644
--- a/gcc/config/riscv/riscv-selftests.cc
+++ b/gcc/config/riscv/riscv-selftests.cc
@@ -116,6 +116,9 @@ calculate_x_in_sequence (rtx reg)
        rtx pat = PATTERN (insn);
        rtx dest = SET_DEST (pat);
+ if (GET_CODE (pat) == PARALLEL)
+       dest = SET_DEST (XVECEXP (pat, 0, 0));

So this assumes you've got a parallel where the first vector is a SET.  That may well be true, but it's probably safer to verify with something like


    gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET)


That way we're not surprised in the future if we have more patterns that use PARALLEL, perhaps with the first not being a simple set.


+{
+  return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
+                      || REGNO (x) == ARG_POINTER_REGNUM
+                      || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
+                          && REGNO (x) <= LAST_VIRTUAL_REGISTER));

Instead I'd write it as


  return (REG_P (x)
          && (REGNO (x) == FRAME_POINTER_REGNUM
              || REGNO (x) == ARG_POINTER_REGNUM
              || (REGNO (x) >= FIRST_VIRUTAL_REGISTER
                  && REGNO (x) <= LAST_VIRTUAL_REGISTER)));


That's just the style most GCC folks are used to reading.


@@ -4887,8 +4897,9 @@ riscv_adjust_libcall_cfi_prologue ()
        }
/* Debug info for adjust sp. */
-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
-                                stack_pointer_rtx, GEN_INT (-saved_size));
+  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+                                   gen_rtx_fmt_ee (PLUS, GET_MODE 
(stack_pointer_rtx),
+                                                 stack_pointer_rtx, GEN_INT 
(saved_size)));
    dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
                          dwarf);
    return dwarf;
@@ -4990,8 +5001,9 @@ riscv_adjust_libcall_cfi_epilogue ()
    int saved_size = cfun->machine->frame.save_libcall_adjustment;
/* Debug info for adjust sp. */
-  adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
-                                stack_pointer_rtx, GEN_INT (saved_size));
+  adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+                                   gen_rtx_fmt_ee (PLUS, GET_MODE 
(stack_pointer_rtx),
+                                                 stack_pointer_rtx, GEN_INT 
(saved_size)));
    dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
                          dwarf);

I think this duplicates a change from Phillip & his team. This as to fix ICEs in the dwarf2 CFI generator, right?  Please double check as remove if it does duplicate a change from Philipp & his team.



+
+(define_insn_and_split "add<mode>3_internal"
+  [(set (match_operand:GPR          0 "register_operand" "=r,r,&r,!&r")
+       (plus:GPR (match_operand:GPR 1 "register_operand" " %r,r,r,0")
+                 (match_operand:GPR 2 "add_operand"      " r,I,L,L")))
+  (clobber (match_scratch:GPR 3 "=X,X,X,&r"))]
+  ""
+{
+  if ((which_alternative == 2) || (which_alternative == 3))
+    return "#";
+
+  if (TARGET_64BIT && <MODE>mode == SImode)
+    return "add%i2w\t%0,%1,%2";
+  else
+    return "add%i2\t%0,%1,%2";
+}
+  "&& reload_completed && const_lui_operand (operands[2], <MODE>mode)"
+  [(const_int 0)]
+{
+  if (REGNO (operands[0]) != REGNO (operands[1]))
+    {
+      emit_insn (gen_mov<mode> (operands[0], operands[2]));
+      emit_insn (gen_add<mode>3_internal (operands[0], operands[0], 
operands[1]));
+    }
+  else
+    {
+      emit_insn (gen_mov<mode> (operands[3], operands[2]));
+      emit_insn (gen_add<mode>3_internal (operands[0], operands[0], 
operands[3]));
+    }
+  DONE;
+}
    [(set_attr "type" "arith")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])

So I think it was Kito that was concerned about potential performance impacts, particularly with the clobber expression.      Probably the best thing I can think of to minimize the potential for codegen changes would be to leave your existing add<mode>3 pattern alone.


Instead define an addptr pattern.  That one is special in that it's only used by LRA.  So the potential for impacting other code is minimized.


Rather than checking REGNO (operands[0]) != REGNO (operands[1]) use reg_overlap_mentioned_p instead.  The only notable difference for this code is that reg_overlap_mentioned_p will work with modes that cover multiple hard regs as well as subregs.

Jeff

Reply via email to