https://gcc.gnu.org/bugzilla/show_bug.cgi?id=123230

            Bug ID: 123230
           Summary: Missed cross-function optimization involving a loop
           Product: gcc
           Version: 16.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: 652023330028 at smail dot nju.edu.cn
  Target Milestone: ---

Hello,

For the reduced test case below, GCC at -O3 appears to miss a cross-function
optimization opportunity in function j.
Since h[*] is always zero, j() can be optimized to a direct call to a().

If the loop condition is changed from l < 6 to l < 5 (as in j_optimized), GCC
performs the optimization as expected.

Reduced code (from ABC project
https://github.com/berkeley-abc/abc/blob/64637b8395a94aa68e67b211d4ecc21840accde1/src/map/if/ifDec07.c#L409):

https://godbolt.org/z/9oza5o7W3

void a();
void c(int e, int h[]) {
    while (e < 6 && h[e])
        e++;
    if (1 == e)
        a();
}
void j() {
    int h[6] = {0};
    for (int l = 0; l < 6; l++){
        c(l, h);
    }
}
void j_optimized() {
    int h[6] = {0};
    for (int l = 0; l < 5; l++){
        c(l, h);
    }
}

Expected (Clang -O3):
j():
        jmp     a()@PLT

GCC -O3:
"j()":
        push    rbp
        pxor    xmm0, xmm0
        push    rbx
        mov     ebx, 1
        sub     rsp, 40
        mov     QWORD PTR [rsp+16], 0
        mov     rbp, rsp
        movaps  XMMWORD PTR [rsp], xmm0
        jmp     .L21
.L24:
        mov     esi, DWORD PTR [rbp+4]
        test    esi, esi
        je      .L23
        cmp     ebx, 5
        je      .L11
        mov     ecx, DWORD PTR [rbp+8]
        test    ecx, ecx
        je      .L11
        cmp     ebx, 4
        je      .L11
        mov     edx, DWORD PTR [rbp+12]
        test    edx, edx
        je      .L11
        cmp     ebx, 3
        je      .L11
.L11:
        add     rbx, 1
        add     rbp, 4
.L21:
        mov     edi, DWORD PTR [rbp+0]
        test    edi, edi
        je      .L14
        cmp     ebx, 6
        jne     .L24
.L12:
        add     rbx, 1
        cmp     rbx, 7
        jne     .L25
        add     rsp, 40
        pop     rbx
        pop     rbp
        ret
.L14:
        cmp     ebx, 2
        jne     .L12
        call    "a()"
        jmp     .L12
.L23:
        cmp     ebx, 1
        jne     .L11
        call    "a()"
        add     rbx, 1
        add     rbp, 4
        jmp     .L21
.L25:
        add     rbp, 4
        jmp     .L21

Reply via email to