https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67856

            Bug ID: 67856
           Summary: callee-saved register saves should be shrink-wrapped
           Product: gcc
           Version: 5.1.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: luto at mit dot edu
  Target Milestone: ---

This code:

typedef _Bool bool;

extern int a(void);

/* used as a proxy for real code. */
volatile int x;

bool func(void *regs)
{
        int t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11;

        while (1) {
                int cached_flags = a();

                if (!__builtin_expect(cached_flags & 31, 0))
                        break;

                t1 = x;
                t2 = x;
                t3 = x;
                t4 = x;
                t5 = x;
                t6 = x;
                t7 = x;
                t8 = x;
                t9 = x;
                t10 = x;
                t11 = x;

                x = t1;
                x = t2;
                x = t3;
                x = t4;
                x = t5;
                x = t6;
                x = t7;
                x = t8;
                x = t9;
                x = t10;
                x = t11;
        }

        return 0;
}

generates (gcc -O2 -S):

        .file   "ra.c"
        .section        .text.unlikely,"ax",@progbits
.LCOLDB0:
        .text
.LHOTB0:
        .p2align 4,,15
        .globl  func
        .type   func, @function
func:
.LFB0:
        .cfi_startproc
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset 6, -16
        pushq   %rbx
        .cfi_def_cfa_offset 24
        .cfi_offset 3, -24
        subq    $8, %rsp
        .cfi_def_cfa_offset 32
.L3:
        call    a
        testb   $31, %al
        jne     .L6
        addq    $8, %rsp
        .cfi_remember_state
        .cfi_def_cfa_offset 24
        xorl    %eax, %eax
        popq    %rbx
        .cfi_def_cfa_offset 16
        popq    %rbp
        .cfi_def_cfa_offset 8
        ret
        .p2align 4,,10
        .p2align 3
.L6:
        .cfi_restore_state
        movl    x(%rip), %ebp
        movl    x(%rip), %ebx
        movl    x(%rip), %r11d
        movl    x(%rip), %r10d
        movl    x(%rip), %r9d
        movl    x(%rip), %r8d
        movl    x(%rip), %edi
        movl    x(%rip), %esi
        movl    x(%rip), %ecx
        movl    x(%rip), %edx
        movl    x(%rip), %eax
        movl    %ebp, x(%rip)
        movl    %ebx, x(%rip)
        movl    %r11d, x(%rip)
        movl    %r10d, x(%rip)
        movl    %r9d, x(%rip)
        movl    %r8d, x(%rip)
        movl    %edi, x(%rip)
        movl    %esi, x(%rip)
        movl    %ecx, x(%rip)
        movl    %edx, x(%rip)
        movl    %eax, x(%rip)
        jmp     .L3
        .cfi_endproc
.LFE0:
        .size   func, .-func
        .section        .text.unlikely
.LCOLDE0:
        .text
.LHOTE0:
        .comm   x,4,4
        .ident  "GCC: (GNU) 5.1.1 20150618 (Red Hat 5.1.1-4)"
        .section        .note.GNU-stack,"",@progbits

The unconditional pushes of rbp and rbx are missed optimizations: they should
be sunk into the cold code that needs them pushed.

Reply via email to