[Bug target/124654] Suboptimal carry chain after mulx in multiply-accumulate loop

liuhongt at gcc dot gnu.org via Gcc-bugs Thu, 26 Mar 2026 18:29:12 -0700

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=124654


--- Comment #2 from Hongtao Liu <liuhongt at gcc dot gnu.org> ---
(In reply to Andrew Pinski from comment #1)
>   _65 = .ADD_OVERFLOW (_9, lo_22);
>   s_25 = REALPART_EXPR <_65>;
>   _66 = IMAGPART_EXPR <_65>;
>   c1_26 = _66 != 0;
>   _38 = .ADD_OVERFLOW (s_25, c_36);
>   s2_27 = REALPART_EXPR <_38>;
>   _1 = IMAGPART_EXPR <_38>;
>   c2_28 = _1 != 0;
>   MEM[(u64 *)r_29(D) + ivtmp.11_64 * 1] = s2_27;
>   _11 = (long long unsigned int) c1_26;
>   _12 = _11 + hi_23;
>   _13 = (long long unsigned int) c2_28;
>   c_31 = _12 + _13;

I think on possible solution is extend match_uaddc_usubc to handle the single
carry case: (A + B) + carry or value + carry where we have exactly one carry.


82  <bb 4> [local count: 955630224]:
 83  # c_36 = PHI <c_31(4), 0(3)>
 84  # ivtmp.11_57 = PHI <ivtmp.11_58(4), 0(3)>
 85  _4 = MEM[(const u64 *)x_19(D) + ivtmp.11_57 * 1];
 86  t_21 = _4 w* y_20(D);
 87  lo_22 = (u64) t_21;
 88  _7 = t_21 >> 64;
 89  hi_23 = (u64) _7;
 90  _9 = MEM[(const u64 *)p_24(D) + ivtmp.11_57 * 1];
 91  _65 = .ADD_OVERFLOW (_9, lo_22);
 92  s_25 = REALPART_EXPR <_65>;
 93  _66 = IMAGPART_EXPR <_65>;
 94  c1_26 = _66 != 0;
 95  _38 = .ADD_OVERFLOW (s_25, c_36);
 96  s2_27 = REALPART_EXPR <_38>;
 97  _1 = IMAGPART_EXPR <_38>;
 98  c2_28 = _1 != 0;
 99  MEM[(u64 *)r_29(D) + ivtmp.11_57 * 1] = s2_27;
100  _11 = (long long unsigned int) c1_26;
101  _2 = .UADDC (hi_23, 0, _11);
102  _12 = REALPART_EXPR <_2>;
103  _13 = (long long unsigned int) c2_28;
104  _3 = .UADDC (_12, 0, _13);
105  c_31 = REALPART_EXPR <_3>;
106  ivtmp.11_58 = ivtmp.11_57 + 8;
107  if (ivtmp.11_58 != _60)

assembly:


30.L7:
31        movq    %r8, %rdx
32        xorl    %r10d, %r10d
33        mulx    (%rbx,%rdi), %rsi, %r15
34        addq    (%r9,%rdi), %rsi
35        setc    %r10b
36        addq    %rax, %rsi
37        movq    %rsi, (%r11,%rdi)
38        movq    %r15, %rax
39        adcq    %r10, %rax
40        addq    $8, %rdi
41        cmpq    %rcx, %rdi
42        jne     .L7

[Bug target/124654] Suboptimal carry chain after mulx in multiply-accumulate loop

Reply via email to