https://gcc.gnu.org/g:d7434f3babc5a954fa68dd0c8ce6d4e917a017c1

commit r16-5036-gd7434f3babc5a954fa68dd0c8ce6d4e917a017c1
Author: Xi Ruoyao <[email protected]>
Date:   Sat Feb 22 15:34:54 2025 +0800

    LoongArch: Avoid unnecessary zero-initialization using LSX for scalar 
popcount
    
    Now for __builtin_popcountl we are getting things like
    
            vrepli.b        $vr0,0
            vinsgr2vr.d     $vr0,$r4,0
            vpcnt.d $vr0,$vr0
            vpickve2gr.du   $r4,$vr0,0
            slli.w  $r4,$r4,0
            jr  $r1
    
    The "vrepli.b" instruction is introduced by the init-regs pass (see
    PR61810 and all the issues it references).  To work it around, we can
    use post-reload instead of define_expand: the "f" constraint will make
    the compiler automatically move the scalar between GPR and FPR, and
    reload is much later than init-regs so init-regs won't get in our way.
    
    Now the code looks like:
    
            movgr2fr.d      $f0,$r4
            vpcnt.d $vr0,$vr0
            movfr2gr.d      $r4,$f0
            jr  $r1
    
    gcc/ChangeLog:
    
            * config/loongarch/loongarch.md (cntmap): Change to uppercase.
            (popcount<GPR:mode>2): Modify to a post reload split.

Diff:
---
 gcc/config/loongarch/loongarch.md | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 2f4817d885c8..ba668880ba56 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1773,21 +1773,23 @@
 
 ;; This attribute used for get connection of scalar mode and corresponding
 ;; vector mode.
-(define_mode_attr cntmap [(SI "v4si") (DI "v2di")])
+(define_mode_attr cntmap [(SI "V4SI") (DI "V2DI")])
 
-(define_expand "popcount<mode>2"
-  [(set (match_operand:GPR 0 "register_operand")
-       (popcount:GPR (match_operand:GPR 1 "register_operand")))]
+(define_insn_and_split "popcount<mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=f")
+       (popcount:GPR (match_operand:GPR 1 "register_operand" "f")))]
   "ISA_HAS_LSX"
+  "#"
+  ;; Do the split very lately to work around init-regs unneeded zero-
+  ;; initialization from init-regs.  See PR61810 and all the referenced
+  ;; issues.
+  "&& reload_completed"
+  [(set (match_operand:<cntmap> 0 "register_operand" "=f")
+       (popcount:<cntmap>
+         (match_operand:<cntmap> 1 "register_operand" "f")))]
 {
-  rtx in = operands[1];
-  rtx out = operands[0];
-  rtx vreg = <MODE>mode == SImode ? gen_reg_rtx (V4SImode) :
-                                   gen_reg_rtx (V2DImode);
-  emit_insn (gen_lsx_vinsgr2vr_<size> (vreg, in, vreg, GEN_INT (1)));
-  emit_insn (gen_popcount<cntmap>2 (vreg, vreg));
-  emit_insn (gen_lsx_vpickve2gr_<size> (out, vreg, GEN_INT (0)));
-  DONE;
+  operands[0] = gen_rtx_REG (<cntmap>mode, REGNO (operands[0]));
+  operands[1] = gen_rtx_REG (<cntmap>mode, REGNO (operands[1]));
 })
 
 ;;

Reply via email to