https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110008

            Bug ID: 110008
           Summary: early returns from functions result in suboptimal code
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: lh_mouse at 126 dot com
  Target Milestone: ---

https://gcc.godbolt.org/z/94Wf3Worq
```
int complex_one(int, int);

int
test(int a, int b, int c)
  {
    if(__builtin_expect(a, 0) == 0)
      return 0;

    int r = complex_one(a, b);
    r += complex_one(r, c);
    return r + a + b;
  }
```

GCC:
```
test:
        push    rdi
        push    rsi
        push    rbx
        sub     rsp, 32
        mov     ebx, ecx
        mov     esi, edx
        test    ecx, ecx
        jne     .L7
        mov     eax, ebx
        add     rsp, 32
        pop     rbx
        pop     rsi
        pop     rdi
        ret
.L7:
        mov     DWORD PTR 80[rsp], r8d
        call    complex_one
        mov     edx, DWORD PTR 80[rsp]
        mov     ecx, eax
        mov     edi, eax
        call    complex_one
        add     edi, eax
        add     ebx, edi
        add     ebx, esi
        mov     eax, ebx
        add     rsp, 32
        pop     rbx
        pop     rsi
        pop     rdi
        ret
```

Clang:
```
test:                                   # @test
        xor     eax, eax
        test    edi, edi
        jne     .LBB0_1
        ret
.LBB0_1:
        push    rbp
        push    r15
        push    r14
        push    rbx
        push    rax
        mov     r14d, edx
        mov     ebx, esi
        mov     ebp, edi
        call    complex_one@PLT
        mov     r15d, eax
        mov     edi, eax
        mov     esi, r14d
        call    complex_one@PLT
        add     ebx, ebp
        add     ebx, r15d
        add     ebx, eax
        mov     eax, ebx
        add     rsp, 8
        pop     rbx
        pop     r14
        pop     r15
        pop     rbp
        ret
```

There are two issues in this code: The first one is that GCC uses apparently
more space for temporary variables than Clang. The other is that when `a`
equals zero, Clang skips the normal function prologue which pushes a lot of
registers onto the stack, but GCC performs the check after it, in which case
both the prologue and epilogue get executed for nothing.

Reply via email to