https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77896

            Bug ID: 77896
           Summary: Object vtable lookups are not hoisted out of loops
           Product: gcc
           Version: 6.2.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: scovich at gmail dot com
  Target Milestone: ---

C++ virtual function calls normally require two memory loads followed by an
indirect jump: one load fetches the vtable from the object, another to fetch
the function address from the vtable, and the indirect call to invoke the
function. 

Given that an object's vtable is fixed over its lifetime, and the contents of a
given vtable are compile-time constant, I would expect the vtable lookups to be
hoisted out of loops when appropriate. For example:

==== foo.cpp =======
struct Foo { virtual void frob(int i)=0; };
void frobN(Foo* f, int n) {
   for (int i=0; i < n; i++)
      f->frob(i);
}
====================

Compiles at -O2 to substantially the same x86 assembly code for gcc-4.9,
gcc-5.2 and gcc-6.2:

_Z5frobNP3Fooi:
        testl   %esi, %esi
        jle     .L10
        pushq   %r12
        movl    %esi, %r12d
        pushq   %rbp
        movq    %rdi, %rbp
        pushq   %rbx
        xorl    %ebx, %ebx
.L5:
        movq    0(%rbp), %rax
        movl    %ebx, %esi
        addl    $1, %ebx
        movq    %rbp, %rdi
        call    *(%rax)
        cmpl    %ebx, %r12d
        jne     .L5
        popq    %rbx
        popq    %rbp
        popq    %r12
.L10:
        rep ret

I would have expected to see something more like this (obtained using the bound
member function extension):

_Z5frobNP3Fooi:
.LFB12:
        pushq   %r13
        pushq   %r12
        pushq   %rbp
        pushq   %rbx
        subq    $8, %rsp
        movq    (%rdi), %rax
        testl   %esi, %esi
        movq    (%rax), %r13
        jle     .L1
        movq    %rdi, %r12
        movl    %esi, %ebp
        xorl    %ebx, %ebx
.L5:
        movl    %ebx, %esi
        addl    $1, %ebx
        movq    %r12, %rdi
        call    *%r13
        cmpl    %ebx, %ebp
        jne     .L5
.L1:
        addq    $8, %rsp
        popq    %rbx
        popq    %rbp
        popq    %r12
        popq    %r13
        ret

Altering the test case to trigger speculative devirtualization as follows:

==== bug2.cpp =======
#include <cstdio>
struct Foo { virtual void frob(int i)=0; };
void frobN(Foo* f, int n)
{
   for (int i=0; i < n; i++)
      f->frob(i);
}
struct Bar : Foo { 
   void frob(int i) { printf("Bar:%d\n", i); }
};
int main()
{
   Bar b;
   frobN(&b, 10);
}
=====================

Shows that even the speculative devirtualization is stuck inside the loop body:

_Z5frobNP3Fooi:
        testl   %esi, %esi
        jle     .L13
        pushq   %r12
        movl    %esi, %r12d
        pushq   %rbp
        movq    %rdi, %rbp
        pushq   %rbx
        xorl    %ebx, %ebx
        jmp     .L8
.L16:
        xorl    %eax, %eax
        movl    $.LC0, %edi
        addl    $1, %ebx
        call    printf
        cmpl    %ebx, %r12d
        je      .L15
.L8:
        movq    0(%rbp), %rax
        movl    %ebx, %esi
        movq    (%rax), %rax
        cmpq    $_ZN3Bar4frobEi, %rax
        je      .L16
        addl    $1, %ebx
        movq    %rbp, %rdi
        call    *%rax
        cmpl    %ebx, %r12d
        jne     .L8
.L15:
        popq    %rbx
        popq    %rbp
        popq    %r12
.L13:
        rep ret

If the vtable lookup could be hoisted, the speculative de-virt could become
very powerful by replicating the loop, something like this:

_Z5frobNP3Fooi:
        testl   %esi, %esi
        jle     .L10
        pushq   %r12
        movl    %esi, %r12d
        pushq   %rbp
        movq    %rdi, %rbp
        pushq   %rbx
        xorl    %ebx, %ebx
        movq    0(%rbp), %rax
        pushq   %r12
        movq    (%rax), %r13
        cmpq    $_ZN3Bar4frobEi, %r13
        je      .L16
.L5:
        movl    %ebx, %esi
        addl    $1, %ebx
        movq    %rbp, %rdi
        call    *%r13
        cmpl    %ebx, %r12d
        jne     .L5
        jmp     .L10
.L16:
        xorl    %eax, %eax
        movl    $.LC0, %edi
        movl    %ebx, %esi
        addl    $1, %ebx
        call    printf
        cmpl    %ebx, %r12d
        jne     .L16
        popq    %r13
.L10:
        popq    %rbx
        popq    %rbp
        popq    %r12
        rep ret

Reply via email to