https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89152
Bug ID: 89152
Summary: Wrapping values in structures can make the optimizer
blind
Product: gcc
Version: 9.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: [email protected]
Target Milestone: ---
GCC compiles the following C module
**
typedef void (*Cont) (void *f, int a);
int quux (int a);
static void g (Cont c, Cont d, int a)
{
if (quux (a))
g (c, d, a + 1);
((Cont) c) (d, a);
}
void bar (Cont, int a);
static void h (Cont d, int a)
{
if (d != (Cont) bar)
((Cont) d) (d, a);
}
void foo (int a)
{
g ((Cont) h, (Cont) bar, a);
}
**
to
**
h:
cmpq $bar, %rdi
je .L1
jmp *%rdi
.L1:
ret
g.constprop.0:
pushq %rbx
movl %edi, %ebx
.L6:
movl %ebx, %edi
call quux
testl %eax, %eax
jne .L8
popq %rbx
ret
.L8:
addl $1, %ebx
jmp .L6
foo:
jmp g.constprop.0
**
Apart from the fact that `h' should be eliminated (see bug ipa/89139), the
resulting code looks rather optimal (maybe except for the unnecessary jump from
f into g).
However, when I wrap the functions pointers that are passed around into a
structure (to avoid having to do typecasts in the code above, for example) as
in the following module
**
typedef struct cont
{
void (*f) (struct cont, int a);
} Cont;
int quux (int a);
static void g (Cont c, Cont d, int a)
{
if (quux (a))
g (c, d, a + 1);
c.f (d, a);
}
void bar (struct cont, int a);
static void h (Cont d, int a)
{
if (d.f != bar)
d.f (d, a);
}
void foo (int a)
{
g ((Cont) { h }, (Cont) { bar }, a);
}
**
a lot of optimizations are missed:
**
h:
cmpq $bar, %rdi
je .L1
jmp *%rdi
.L1:
ret
g.constprop.0:
pushq %r13
movq %rdi, %r13
movl %edx, %edi
pushq %r12
movq %rsi, %r12
pushq %rbp
movl %edx, %ebp
call quux
testl %eax, %eax
jne .L10
movl %ebp, %esi
movq %r12, %rdi
popq %rbp
popq %r12
popq %r13
jmp h
.L10:
movq %r12, %rsi
movq %r13, %rdi
leal 1(%rbp), %edx
call g.constprop.0
movl %ebp, %esi
movq %r12, %rdi
popq %rbp
popq %r12
popq %r13
jmp h
foo:
movl %edi, %edx
movl $bar, %esi
movl $h, %edi
jmp g.constprop.0
**
Both compilations were done at -O2; -O3 makes things no better.
For a comparison, clang optimizes also the latter code, namely to:
**
foo: # @foo
pushq %rbx
movl %edi, %ebx
.LBB0_1: # =>This Inner Loop Header: Depth=1
movl %ebx, %edi
callq quux
addl $1, %ebx
testl %eax, %eax
jne .LBB0_1
popq %rbx
retq
**