https://gcc.gnu.org/g:12b4b56fd10e64e9703ac22af63b02c108ee402c
commit r16-7651-g12b4b56fd10e64e9703ac22af63b02c108ee402c Author: Sandra Loosemore <[email protected]> Date: Mon Feb 23 16:10:01 2026 +0000 amdgcn: prefer scalar ALU instructions over vector This patch fixes a performance regression that crept in with commit gcc-16-2237-g0eee2dd2865 ("Don't clobber VCC if we don't need to"). Removing the clobber from vector-register ALU insn pattern alternatives seems to make LRA think those alternatives are less costly than the scalar-register alternatives, which do need the clobber, resulting in extra move instructions instead. In this patch I've adjusted all the ALU patterns (not just the ones modified by the above-mentioned commit) where the scalar-register alternatives clobber and vector-register alternatives don't, to slightly disparage the latter using the "?" constraint modifier. I also fixed a related issue: the comment on addptrdi3 says the scalar-register alternative is preferred, but it was listed after the vector alternative, so other things being equal it would still prefer the vector expansion. I've not changed the constraints, but simply flipped the order of the alternatives. Thanks to Tobias Burnus for identifying the commit that caused the performance regression, Andrew Stubbs for making the connection between clobbers and LRA costs, and Arsen Arsenović for setting up the benchmark scripting I used to compare results. gcc/ChangeLog * config/gcn/gcn.md (addsi3): Disparage vector alternative. (addptrdi3): Reverse order of vector and scalar alternatives to prefer the latter. (subsi3): Disparage vector alternatives. (mulsi3): Likewise. (muldi3): Likewise. (bitunop <expander>si2): Likewise. (vec_and_scalar_com <expander>si3): Likewise. (vec_and_scalar_nocom <expander>si3): Likewise. (one_cmpldi2): Likewise. (vec_and_scalar64_com <expander>di3): Likwise. (vec_and_scalar64_nocom <expander>di3): Likwise. Diff: --- gcc/config/gcn/gcn.md | 52 +++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index 5957b29f7487..c49bd6cf8bd2 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -1127,7 +1127,7 @@ ;; {{{ ALU special cases: Plus (define_insn "addsi3" - [(set (match_operand:SI 0 "register_operand" "= Sg, Sg, Sg, v") + [(set (match_operand:SI 0 "register_operand" "= Sg, Sg, Sg, ?v") (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v") (match_operand:SI 2 "gcn_alu_operand" " SgA,SgJ, B,vBSv"))) (clobber (match_scratch:BI 3 "= cs, cs, cs, X"))] @@ -1285,23 +1285,14 @@ ; The SGPR alternative is preferred as it is typically used with mov_sgprbase. (define_insn "addptrdi3" - [(set (match_operand:DI 0 "register_operand" "= v, Sg") + [(set (match_operand:DI 0 "register_operand" "= Sg, v") (unspec:DI [ - (plus:DI (match_operand:DI 1 "register_operand" "^v0,Sg0") - (match_operand:DI 2 "nonmemory_operand" "vDA,SgDB"))] + (plus:DI (match_operand:DI 1 "register_operand" " Sg0,^v0") + (match_operand:DI 2 "nonmemory_operand" "SgDB,vDA"))] UNSPEC_ADDPTR))] "" { if (which_alternative == 0) - { - rtx new_operands[4] = { operands[0], operands[1], operands[2], - gen_rtx_REG (DImode, CC_SAVE_REG) }; - - output_asm_insn ("v_add_co_u32\t%L0, %3, %L2, %L1", new_operands); - output_asm_insn ("{v_addc_co_u32|v_add_co_ci_u32}\t%H0, %3, %H2, %H1, %3", - new_operands); - } - else { rtx new_operands[4] = { operands[0], operands[1], operands[2], gen_rtx_REG (BImode, CC_SAVE_REG) }; @@ -1311,17 +1302,26 @@ output_asm_insn ("s_addc_u32\t%H0, %H1, %H2", new_operands); output_asm_insn ("s_cmpk_lg_u32\t%3, 0", new_operands); } + else + { + rtx new_operands[4] = { operands[0], operands[1], operands[2], + gen_rtx_REG (DImode, CC_SAVE_REG) }; + + output_asm_insn ("v_add_co_u32\t%L0, %3, %L2, %L1", new_operands); + output_asm_insn ("{v_addc_co_u32|v_add_co_ci_u32}\t%H0, %3, %H2, %H1, %3", + new_operands); + } return ""; } - [(set_attr "type" "vmult,mult") - (set_attr "length" "16,24")]) + [(set_attr "type" "mult,vmult") + (set_attr "length" "24,16")]) ;; }}} ;; {{{ ALU special cases: Minus (define_insn "subsi3" - [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v, v") + [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, ?v, ?v") (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgA, v,vBSv") (match_operand:SI 2 "gcn_alu_operand" "SgA, B, vBSv, v"))) (clobber (match_scratch:BI 3 "=cs, cs, X, X"))] @@ -1427,7 +1427,7 @@ ; The "s_mulk_i32" variant sets SCC to indicate overflow (which we don't care ; about here, but we need to indicate the clobbering). (define_insn "mulsi3" - [(set (match_operand:SI 0 "register_operand" "= Sg,Sg, Sg, v") + [(set (match_operand:SI 0 "register_operand" "= Sg,Sg, Sg, ?v") (mult:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v") (match_operand:SI 2 "gcn_alu_operand" " SgA, J, B,vASv"))) (clobber (match_scratch:BI 3 "=X,cs, X, X"))] @@ -1556,9 +1556,9 @@ }) (define_insn_and_split "muldi3" - [(set (match_operand:DI 0 "register_operand" "=&Sg,&Sg, &v,&v") - (mult:DI (match_operand:DI 1 "register_operand" "%Sg, Sg, v, v") - (match_operand:DI 2 "nonmemory_operand" "Sg, i,vSv, A"))) + [(set (match_operand:DI 0 "register_operand" "=&Sg,&Sg,?&v,?&v") + (mult:DI (match_operand:DI 1 "register_operand" "%Sg, Sg, v, v") + (match_operand:DI 2 "nonmemory_operand" "Sg, i,vSv, A"))) (clobber (match_scratch:SI 3 "=&Sg,&Sg,&v,&v")) (clobber (match_scratch:BI 4 "=cs, cs, X, X"))] "" @@ -1613,7 +1613,7 @@ (define_code_attr popcount_extra_op [(not "") (popcount ", 0")]) (define_insn "<expander>si2" - [(set (match_operand:SI 0 "register_operand" "=Sg, v") + [(set (match_operand:SI 0 "register_operand" "=Sg, ?v") (bitunop:SI (match_operand:SI 1 "gcn_alu_operand" "SgB,vSvB"))) (clobber (match_scratch:BI 2 "=cs, X"))] @@ -1691,7 +1691,7 @@ (define_code_iterator vec_and_scalar_nocom [ashift lshiftrt ashiftrt]) (define_insn "<expander>si3" - [(set (match_operand:SI 0 "gcn_valu_dst_operand" "= Sg, v,RD") + [(set (match_operand:SI 0 "gcn_valu_dst_operand" "= Sg, ?v,?RD") (vec_and_scalar_com:SI (match_operand:SI 1 "gcn_valu_src0_operand" "%SgA,vSvB, 0") (match_operand:SI 2 "gcn_alu_operand" " SgB, v, v"))) @@ -1705,7 +1705,7 @@ (set_attr "length" "8")]) (define_insn "<expander>si3" - [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v") + [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, ?v") (vec_and_scalar_nocom:SI (match_operand:SI 1 "gcn_alu_operand" "SgB,SgA, v") (match_operand:SI 2 "gcn_alu_operand" "SgA,SgB,vSvB"))) @@ -1731,7 +1731,7 @@ ;; {{{ ALU: generic 64-bit (define_insn_and_split "one_cmpldi2" - [(set (match_operand:DI 0 "register_operand" "=Sg, v") + [(set (match_operand:DI 0 "register_operand" "=Sg, ?v") (not:DI (match_operand:DI 1 "gcn_alu_operand" "SgA,vSvDB"))) (clobber (match_scratch:BI 2 "=cs, X"))] "" @@ -1753,7 +1753,7 @@ (define_code_iterator vec_and_scalar64_com [and ior xor]) (define_insn_and_split "<expander>di3" - [(set (match_operand:DI 0 "register_operand" "= Sg, v") + [(set (match_operand:DI 0 "register_operand" "= Sg, ?v") (vec_and_scalar64_com:DI (match_operand:DI 1 "gcn_alu_operand" "%SgA,vSvDB") (match_operand:DI 2 "gcn_alu_operand" " SgC, v"))) @@ -1781,7 +1781,7 @@ (set_attr "length" "8")]) (define_insn "<expander>di3" - [(set (match_operand:DI 0 "register_operand" "=Sg, Sg, v") + [(set (match_operand:DI 0 "register_operand" "=Sg, Sg, ?v") (vec_and_scalar_nocom:DI (match_operand:DI 1 "gcn_alu_operand" "SgC,SgA, v") (match_operand:SI 2 "gcn_alu_operand" "SgA,SgC,vSvC")))
