https://gcc.gnu.org/g:74d8accaf88f83bfcab1150bf9be5140e7ac0e94

commit r15-1976-g74d8accaf88f83bfcab1150bf9be5140e7ac0e94
Author: Jeff Law <j...@ventanamicro.com>
Date:   Thu Jul 11 12:05:56 2024 -0600

    [to-be-committed,RISC-V] Eliminate unnecessary sign extension after inlined 
str[n]cmp
    
    This patch eliminates an unnecessary sign extension for scalar inlined
    string comparisons on rv64.
    
    Conceptually this is pretty simple.  Prove all the paths which "return"
    a value from the inlined string comparison already have sign extended
    values.
    
    FINAL_LABEL is the point after the calculation of the return value.  So
    if we have a jump to FINAL_LABEL, we must have a properly extended
    result value at that point.
    
    Second we're going to arrange in the .md part of the expander to use an
    X mode temporary for the result.  After computing the result we will (if
    necessary) extract the low part of the result using a SUBREG tagged with
    the appropriate SUBREG_PROMOTED_* bits.
    
    So with that background.
    
    We find a jump to FINAL_LABEL in emit_strcmp_scalar_compare_byte.  Since
    we know the result is X mode, we can just emit the subtraction of the
    two chars in X mode and we'll have a properly sign extended result.
    
    There's 4 jumps to final_label in emit_strcmp_scalar.
    
    The first is just returning zero and needs trivial simplification to not
    force the result into SImode.
    
    The second is after calling strcmp in the library.  The ABI mandates
    that value is sign extended, so there's nothing to do for that case.
    
    The 3rd occurs after a call to
    emit_strcmp_scalar_result_calculation_nonul.  If we dive into that
    routine it needs simplificationq similar to what we did in
    emit_strcmp_scalar_compare_byte
    
    The 4th occurs after a call to emit_strcmp_scalar_result_calculation
    which again needs trivial adjustment like we've done in the other routines.
    
    Finally, at the end of expand_strcmp, just store the X mode result
    sitting in SUB to RESULT.
    
    The net of all that is we know every path has its result properly
    extended to X mode.  Standard redundant extension removal will take care
    of the rest.
    
    We've been running this within Ventana for about 6 months, so naturally
    it's been through various QA cycles, dhrystone, spec2017, etc.  It's
    also been through a build/test cycle in my tester.  Waiting on results
    from the pre-commit testing before moving forward.
    
    gcc/
            * config/riscv/riscv-string.cc
            (emit_strcmp_scalar_compare_byte): Set RESULT directly rather
            than using a new temporary.
            (emit_strcmp_scalar_result_calculation_nonul): Likewise.
            (emit_strcmp_scalar_result_calculation): Likewise.
            (riscv_expand_strcmp_scalar): Use CONST0_RTX rather than
            generating a new node.
            (expand_strcmp): Copy directly from SUB to RESULT.
            * config/riscv/riscv.md (cmpstrnsi, cmpstrsi): Pass an X
            mode temporary to the expansion routines.  If necessary
            extract low part of the word to store in final result location.

Diff:
---
 gcc/config/riscv/riscv-string.cc | 15 +++++----------
 gcc/config/riscv/riscv.md        | 28 ++++++++++++++++++++++++----
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 257a514d2901..4736228e6f14 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -140,9 +140,7 @@ static void
 emit_strcmp_scalar_compare_byte (rtx result, rtx data1, rtx data2,
                                 rtx final_label)
 {
-  rtx tmp = gen_reg_rtx (Xmode);
-  do_sub3 (tmp, data1, data2);
-  emit_insn (gen_movsi (result, gen_lowpart (SImode, tmp)));
+  do_sub3 (result, data1, data2);
   emit_jump_insn (gen_jump (final_label));
   emit_barrier (); /* No fall-through.  */
 }
@@ -310,8 +308,7 @@ emit_strcmp_scalar_result_calculation_nonul (rtx result, 
rtx data1, rtx data2)
   rtx tmp = gen_reg_rtx (Xmode);
   emit_insn (gen_slt_3 (LTU, Xmode, Xmode, tmp, data1, data2));
   do_neg2 (tmp, tmp);
-  do_ior3 (tmp, tmp, const1_rtx);
-  emit_insn (gen_movsi (result, gen_lowpart (SImode, tmp)));
+  do_ior3 (result, tmp, const1_rtx);
 }
 
 /* strcmp-result calculation.
@@ -367,9 +364,7 @@ emit_strcmp_scalar_result_calculation (rtx result, rtx 
data1, rtx data2,
   unsigned int shiftr = (xlen - 1) * BITS_PER_UNIT;
   do_lshr3 (data1, data1, GEN_INT (shiftr));
   do_lshr3 (data2, data2, GEN_INT (shiftr));
-  rtx tmp = gen_reg_rtx (Xmode);
-  do_sub3 (tmp, data1, data2);
-  emit_insn (gen_movsi (result, gen_lowpart (SImode, tmp)));
+  do_sub3 (result, data1, data2);
 }
 
 /* Expand str(n)cmp using Zbb/TheadBb instructions.
@@ -444,7 +439,7 @@ riscv_expand_strcmp_scalar (rtx result, rtx src1, rtx src2,
   /* All compared and everything was equal.  */
   if (ncompare)
     {
-      emit_insn (gen_rtx_SET (result, gen_rtx_CONST_INT (SImode, 0)));
+      emit_insn (gen_rtx_SET (result, CONST0_RTX (GET_MODE (result))));
       emit_jump_insn (gen_jump (final_label));
       emit_barrier (); /* No fall-through.  */
     }
@@ -1512,7 +1507,7 @@ expand_strcmp (rtx result, rtx src1, rtx src2, rtx nbytes,
   if (with_length)
     emit_label (done);
 
-  emit_insn (gen_movsi (result, gen_lowpart (SImode, sub)));
+  emit_move_insn (result, sub);
   return true;
 }
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index c0c960353eb9..2e2379dfca4f 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4162,9 +4162,19 @@
   "riscv_inline_strncmp && !optimize_size
     && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)"
 {
-  if (riscv_expand_strcmp (operands[0], operands[1], operands[2],
+  rtx temp = gen_reg_rtx (word_mode);
+  if (riscv_expand_strcmp (temp, operands[1], operands[2],
                            operands[3], operands[4]))
-    DONE;
+    {
+      if (TARGET_64BIT)
+       {
+         temp = gen_lowpart (SImode, temp);
+         SUBREG_PROMOTED_VAR_P (temp) = 1;
+         SUBREG_PROMOTED_SET (temp, SRP_SIGNED);
+       }
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
   else
     FAIL;
 })
@@ -4183,9 +4193,19 @@
   "riscv_inline_strcmp && !optimize_size
     && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)"
 {
-  if (riscv_expand_strcmp (operands[0], operands[1], operands[2],
+  rtx temp = gen_reg_rtx (word_mode);
+  if (riscv_expand_strcmp (temp, operands[1], operands[2],
                            NULL_RTX, operands[3]))
-    DONE;
+    {
+      if (TARGET_64BIT)
+       {
+         temp = gen_lowpart (SImode, temp);
+         SUBREG_PROMOTED_VAR_P (temp) = 1;
+         SUBREG_PROMOTED_SET (temp, SRP_SIGNED);
+       }
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
   else
     FAIL;
 })

Reply via email to