https://gcc.gnu.org/bugzilla/show_bug.cgi?id=123524
--- Comment #3 from mikulas at artax dot karlin.mff.cuni.cz ---
Andrew Pinski: I tried to use __builtin_add_overflow and it doesn't help. On
gcc-16, I get this:
18e12: 41 0f b6 4d 02 movzbl 0x2(%r13),%ecx
18e17: 41 0f b6 55 03 movzbl 0x3(%r13),%edx
18e1c: 41 0f b6 7d 04 movzbl 0x4(%r13),%edi
18e21: 0f b6 34 13 movzbl (%rbx,%rdx,1),%esi
18e25: 40 0a 34 0b or (%rbx,%rcx,1),%sil
18e29: 0f 85 fb a3 02 00 jne 4322a <u_run+0x3e5aa>
18e2f: 48 8d 34 cb lea (%rbx,%rcx,8),%rsi
18e33: 48 8d 0c d3 lea (%rbx,%rdx,8),%rcx
18e37: 48 8b 16 mov (%rsi),%rdx
18e3a: 48 03 11 add (%rcx),%rdx
18e3d: 48 89 d1 mov %rdx,%rcx
18e40: 0f 80 e4 a3 02 00 jo 4322a <u_run+0x3e5aa>
18e46: 40 0f b6 d7 movzbl %dil,%edx
18e4a: 48 89 0c d3 mov %rcx,(%rbx,%rdx,8)
18e4e: 41 0f b7 55 06 movzwl 0x6(%r13),%edx
18e53: 48 8d 05 00 00 00 00 lea 0x0(%rip),%rax # 18e5a
<u_run+0x141da>
18e5a: 49 83 c5 06 add $0x6,%r13
18e5e: 48 8b 04 d0 mov (%rax,%rdx,8),%rax
18e62: e9 59 be fe ff jmp 4cc0 <u_run+0x40>
4cc0: ba 02 00 02 00 mov $0x20002,%edx
4cc5: 66 0f 6e e2 movd %edx,%xmm4
4cc9: 66 0f 70 fc 00 pshufd $0x0,%xmm4,%xmm7
4cce: 0f 29 3c 24 movaps %xmm7,(%rsp)
4cd2: ff e0 jmp *%rax
On gcc-15, I get this with __builtin_add_overflow:
3152e: 41 0f b6 4c 24 02 movzbl 0x2(%r12),%ecx
31534: 41 0f b6 54 24 03 movzbl 0x3(%r12),%edx
3153a: 41 0f b6 74 24 04 movzbl 0x4(%r12),%esi
31540: 0f b6 3c 13 movzbl (%rbx,%rdx,1),%edi
31544: 40 0a 3c 0b or (%rbx,%rcx,1),%dil
31548: 0f 85 bc d2 ff ff jne 2e80a <u_run+0x29bea>
3154e: 48 8b 0c cb mov (%rbx,%rcx,8),%rcx
31552: 48 03 0c d3 add (%rbx,%rdx,8),%rcx
31556: 0f 80 ae d2 ff ff jo 2e80a <u_run+0x29bea>
3155c: 40 0f b6 d6 movzbl %sil,%edx
31560: 48 89 0c d3 mov %rcx,(%rbx,%rdx,8)
31564: 41 0f b7 54 24 06 movzwl 0x6(%r12),%edx
3156a: 48 8d 05 00 00 00 00 lea 0x0(%rip),%rax # 31571
<u_run+0x2c951>
31571: 49 83 c4 06 add $0x6,%r12
31575: ff 24 d0 jmp *(%rax,%rdx,8)
There is one more instruction "movzbl %sil,%edx" compared to the code with asm
goto, so I'll keep asm goto. (__builtin_add_overflow is used on other
architectures)