https://gcc.gnu.org/bugzilla/show_bug.cgi?id=124654
--- Comment #2 from Hongtao Liu <liuhongt at gcc dot gnu.org> --- (In reply to Andrew Pinski from comment #1) > _65 = .ADD_OVERFLOW (_9, lo_22); > s_25 = REALPART_EXPR <_65>; > _66 = IMAGPART_EXPR <_65>; > c1_26 = _66 != 0; > _38 = .ADD_OVERFLOW (s_25, c_36); > s2_27 = REALPART_EXPR <_38>; > _1 = IMAGPART_EXPR <_38>; > c2_28 = _1 != 0; > MEM[(u64 *)r_29(D) + ivtmp.11_64 * 1] = s2_27; > _11 = (long long unsigned int) c1_26; > _12 = _11 + hi_23; > _13 = (long long unsigned int) c2_28; > c_31 = _12 + _13; I think on possible solution is extend match_uaddc_usubc to handle the single carry case: (A + B) + carry or value + carry where we have exactly one carry. 82 <bb 4> [local count: 955630224]: 83 # c_36 = PHI <c_31(4), 0(3)> 84 # ivtmp.11_57 = PHI <ivtmp.11_58(4), 0(3)> 85 _4 = MEM[(const u64 *)x_19(D) + ivtmp.11_57 * 1]; 86 t_21 = _4 w* y_20(D); 87 lo_22 = (u64) t_21; 88 _7 = t_21 >> 64; 89 hi_23 = (u64) _7; 90 _9 = MEM[(const u64 *)p_24(D) + ivtmp.11_57 * 1]; 91 _65 = .ADD_OVERFLOW (_9, lo_22); 92 s_25 = REALPART_EXPR <_65>; 93 _66 = IMAGPART_EXPR <_65>; 94 c1_26 = _66 != 0; 95 _38 = .ADD_OVERFLOW (s_25, c_36); 96 s2_27 = REALPART_EXPR <_38>; 97 _1 = IMAGPART_EXPR <_38>; 98 c2_28 = _1 != 0; 99 MEM[(u64 *)r_29(D) + ivtmp.11_57 * 1] = s2_27; 100 _11 = (long long unsigned int) c1_26; 101 _2 = .UADDC (hi_23, 0, _11); 102 _12 = REALPART_EXPR <_2>; 103 _13 = (long long unsigned int) c2_28; 104 _3 = .UADDC (_12, 0, _13); 105 c_31 = REALPART_EXPR <_3>; 106 ivtmp.11_58 = ivtmp.11_57 + 8; 107 if (ivtmp.11_58 != _60) assembly: 30.L7: 31 movq %r8, %rdx 32 xorl %r10d, %r10d 33 mulx (%rbx,%rdi), %rsi, %r15 34 addq (%r9,%rdi), %rsi 35 setc %r10b 36 addq %rax, %rsi 37 movq %rsi, (%r11,%rdi) 38 movq %r15, %rax 39 adcq %r10, %rax 40 addq $8, %rdi 41 cmpq %rcx, %rdi 42 jne .L7
