https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67856
Bug ID: 67856 Summary: callee-saved register saves should be shrink-wrapped Product: gcc Version: 5.1.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: rtl-optimization Assignee: unassigned at gcc dot gnu.org Reporter: luto at mit dot edu Target Milestone: --- This code: typedef _Bool bool; extern int a(void); /* used as a proxy for real code. */ volatile int x; bool func(void *regs) { int t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11; while (1) { int cached_flags = a(); if (!__builtin_expect(cached_flags & 31, 0)) break; t1 = x; t2 = x; t3 = x; t4 = x; t5 = x; t6 = x; t7 = x; t8 = x; t9 = x; t10 = x; t11 = x; x = t1; x = t2; x = t3; x = t4; x = t5; x = t6; x = t7; x = t8; x = t9; x = t10; x = t11; } return 0; } generates (gcc -O2 -S): .file "ra.c" .section .text.unlikely,"ax",@progbits .LCOLDB0: .text .LHOTB0: .p2align 4,,15 .globl func .type func, @function func: .LFB0: .cfi_startproc pushq %rbp .cfi_def_cfa_offset 16 .cfi_offset 6, -16 pushq %rbx .cfi_def_cfa_offset 24 .cfi_offset 3, -24 subq $8, %rsp .cfi_def_cfa_offset 32 .L3: call a testb $31, %al jne .L6 addq $8, %rsp .cfi_remember_state .cfi_def_cfa_offset 24 xorl %eax, %eax popq %rbx .cfi_def_cfa_offset 16 popq %rbp .cfi_def_cfa_offset 8 ret .p2align 4,,10 .p2align 3 .L6: .cfi_restore_state movl x(%rip), %ebp movl x(%rip), %ebx movl x(%rip), %r11d movl x(%rip), %r10d movl x(%rip), %r9d movl x(%rip), %r8d movl x(%rip), %edi movl x(%rip), %esi movl x(%rip), %ecx movl x(%rip), %edx movl x(%rip), %eax movl %ebp, x(%rip) movl %ebx, x(%rip) movl %r11d, x(%rip) movl %r10d, x(%rip) movl %r9d, x(%rip) movl %r8d, x(%rip) movl %edi, x(%rip) movl %esi, x(%rip) movl %ecx, x(%rip) movl %edx, x(%rip) movl %eax, x(%rip) jmp .L3 .cfi_endproc .LFE0: .size func, .-func .section .text.unlikely .LCOLDE0: .text .LHOTE0: .comm x,4,4 .ident "GCC: (GNU) 5.1.1 20150618 (Red Hat 5.1.1-4)" .section .note.GNU-stack,"",@progbits The unconditional pushes of rbp and rbx are missed optimizations: they should be sunk into the cold code that needs them pushed.