
GCC 10.2.0 (and GCC 8.3; other versions and targets except i386 and
amd64 not tested) generate rather bad code for the following ternary

--- repro.c ---
#define NULL (char *) 0

char *dummy(char *string, long count) {
    return count == 0 ? NULL : string + 1;
--- EOF ---

$ gcc -m64 -o- -O3 -S repro.c

        addq    $1, %rdi
        movl    $0, %eax
        testq   %rsi, %rsi
        cmovne  %rdi, %rax

JFTR: why does GCC NOT generate the shorter "XOR %eax, %eax" here?

$ gcc -m64 -O3 -c dummy.c
$ objdump -D dummy.o

0000000000000000 <dummy>:
   0: 48 83 c7 01           add    $0x1,%rdi
   4: b8 00 00 00 00        mov    $0x0,%eax
   9: 48 85 f6              test   %rsi,%rsi
   c: 48 0f 45 c7           cmovne %rdi,%rax
  10: c3                    retq   

i386 and AMD64 use the ILP32 and LP64 data model where a "long" and
a "pointer" have the same size, and 0L and the null pointer have the
same binary representation, so the contents of RSI should be used to
load RAX with 0 conditionally:

        leaq    1(%rdi), %rax
        testq   %rsi, %rsi
        cmoveq  %rdi, %rax

$ gcc -m32 -o- -O3 -S dummy.c

        movl   8(%esp), %edx
        movl   4(%esp), %eax
        addl   $1, %eax
        testl  %edx, %edx
        movl   $0, %edx
        cmove  %edx, %eax    # OUCH: if this executes, EDX was 0 before,
        ret                  #       so the MOV is really a NOP!

$ gcc -m32 -O3 -c dummy.c
$ objdump -D dummy.o

00000000 <_dummy>:
   0:   8b 54 24 08             mov    0x8(%esp),%edx
   4:   8b 44 24 04             mov    0x4(%esp),%eax
   8:   83 c0 01                add    $0x1,%eax
   b:   85 d2                   test   %edx,%edx
   d:   ba 00 00 00 00          mov    $0x0,%edx
  12:   0f 44 c2                cmove  %edx,%eax
  15:   c3                      ret    

Here's what GCC should but generate:

00000000 <_dummy>:
   0:   8b 44 24 04             mov    0x4(%esp),%eax
   4:   8b 4c 24 08             mov    0x8(%esp),%ecx
   8:   40                      inc    %eax
   9:   f7 d9                   neg    %ecx
   b:   19 c9                   sbb    %ecx,%ecx
   d:   21 c8                   and    %ecx,%eax
   f:   c3                      ret    

For (pre)historic processors which don't support CMOVcc the
following code is generated:

$ gcc -m32 -mtune=i386 -o- -S dummy.c

        movl    8(%esp), %eax
        testl   %eax, %eax
        je      L3
        movl    4(%esp), %eax
        incl    %eax
        .p2align 2
L3:                        # OUCH: EAX is already 0 here!
        xorl    %eax, %eax

00000000 <dummy>:
   0:   8b 44 24 08             mov    0x8(%esp),%eax
   4:   85 c0                   test   %eax,%eax
   6:   74 08                   je     10 <dummy+0x10>
   8:   8b 44 24 04             mov    0x4(%esp),%eax
   c:   40                      inc    %eax
   d:   c3                      ret
   e:   66 90                   xchg   %ax,%ax
  10:   31 c0                   xor    %eax,%eax
  12:   c3                      ret

not amused
Stefan Kanthak

