https://gcc.gnu.org/g:d7434f3babc5a954fa68dd0c8ce6d4e917a017c1
commit r16-5036-gd7434f3babc5a954fa68dd0c8ce6d4e917a017c1 Author: Xi Ruoyao <[email protected]> Date: Sat Feb 22 15:34:54 2025 +0800 LoongArch: Avoid unnecessary zero-initialization using LSX for scalar popcount Now for __builtin_popcountl we are getting things like vrepli.b $vr0,0 vinsgr2vr.d $vr0,$r4,0 vpcnt.d $vr0,$vr0 vpickve2gr.du $r4,$vr0,0 slli.w $r4,$r4,0 jr $r1 The "vrepli.b" instruction is introduced by the init-regs pass (see PR61810 and all the issues it references). To work it around, we can use post-reload instead of define_expand: the "f" constraint will make the compiler automatically move the scalar between GPR and FPR, and reload is much later than init-regs so init-regs won't get in our way. Now the code looks like: movgr2fr.d $f0,$r4 vpcnt.d $vr0,$vr0 movfr2gr.d $r4,$f0 jr $r1 gcc/ChangeLog: * config/loongarch/loongarch.md (cntmap): Change to uppercase. (popcount<GPR:mode>2): Modify to a post reload split. Diff: --- gcc/config/loongarch/loongarch.md | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 2f4817d885c8..ba668880ba56 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -1773,21 +1773,23 @@ ;; This attribute used for get connection of scalar mode and corresponding ;; vector mode. -(define_mode_attr cntmap [(SI "v4si") (DI "v2di")]) +(define_mode_attr cntmap [(SI "V4SI") (DI "V2DI")]) -(define_expand "popcount<mode>2" - [(set (match_operand:GPR 0 "register_operand") - (popcount:GPR (match_operand:GPR 1 "register_operand")))] +(define_insn_and_split "popcount<mode>2" + [(set (match_operand:GPR 0 "register_operand" "=f") + (popcount:GPR (match_operand:GPR 1 "register_operand" "f")))] "ISA_HAS_LSX" + "#" + ;; Do the split very lately to work around init-regs unneeded zero- + ;; initialization from init-regs. See PR61810 and all the referenced + ;; issues. + "&& reload_completed" + [(set (match_operand:<cntmap> 0 "register_operand" "=f") + (popcount:<cntmap> + (match_operand:<cntmap> 1 "register_operand" "f")))] { - rtx in = operands[1]; - rtx out = operands[0]; - rtx vreg = <MODE>mode == SImode ? gen_reg_rtx (V4SImode) : - gen_reg_rtx (V2DImode); - emit_insn (gen_lsx_vinsgr2vr_<size> (vreg, in, vreg, GEN_INT (1))); - emit_insn (gen_popcount<cntmap>2 (vreg, vreg)); - emit_insn (gen_lsx_vpickve2gr_<size> (out, vreg, GEN_INT (0))); - DONE; + operands[0] = gen_rtx_REG (<cntmap>mode, REGNO (operands[0])); + operands[1] = gen_rtx_REG (<cntmap>mode, REGNO (operands[1])); }) ;;
