http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53034

--- Comment #1 from Steven Bosscher <steven at gcc dot gnu.org> 2012-04-18 
20:25:45 UTC ---
The gimple switch conversion pass is much too aggressive, worse code is
generated for the examples that were used to introduce the implementation of
switch statements with bit tests.
(see http://gcc.gnu.org/ml/gcc-patches/2003-01/msg01733.html)

For example, "GCC: (GNU) 4.5.0 20091228 (experimental) [trunk revision 155486]"
applied to the first test case:

$ cat t.c
int foo(int x)
{
  switch (x)
    {
    case 4:
    case 6:
    case 9:
    case 11:
      return 30;
    }
  return 31;
}

$ gcc-4.5.0 -S -Os -fdump-tree-optimized -fdump-rtl-expand-slim t.c -m32
-fno-tree-switch-conversion && cat t.s
    .file    "t.c"
    .text
.globl foo
    .type    foo, @function
foo:
    pushl    %ebp
    movl    %esp, %ebp
    movl    8(%ebp), %ecx
    cmpl    $11, %ecx
    ja    .L4
    movl    $1, %edx
    movl    $30, %eax
    sall    %cl, %edx
    andl    $2640, %edx
    jne    .L2
.L4:
    movl    $31, %eax
.L2:
    popl    %ebp
    ret
    .size    foo, .-foo
    .ident    "GCC: (GNU) 4.5.0 20091228 (experimental) [trunk revision
155486]"
    .section    .note.GNU-stack,"",@progbits
$ gcc-4.5.0 -S -Os -fdump-tree-optimized -fdump-rtl-expand-slim t.c -m32 && cat
t.s 
    .file    "t.c"
    .text
.globl foo
    .type    foo, @function
foo:
    pushl    %ebp
    movl    $31, %eax
    movl    %esp, %ebp
    movl    8(%ebp), %edx
    subl    $4, %edx
    cmpl    $7, %edx
    ja    .L2
    movl    CSWTCH.1(,%edx,4), %eax
.L2:
    popl    %ebp
    ret
    .size    foo, .-foo
    .section    .rodata
    .align 4
    .type    CSWTCH.1, @object
    .size    CSWTCH.1, 32
CSWTCH.1:
    .long    30
    .long    31
    .long    30
    .long    31
    .long    31
    .long    30
    .long    31
    .long    30
    .ident    "GCC: (GNU) 4.5.0 20091228 (experimental) [trunk revision
155486]"
    .section    .note.GNU-stack,"",@progbits

Without switch conversion, there are 12 instructions including 2 forward
branches and no loads from memory and no rodata. With switch conversion there
are 10 insns including one load and one branch. So size takes a penalty, and
for throughput the load may be more expensive than the branch (for larger test
cases anyway).

$ gcc-4.5.0 -c -Os -fdump-tree-optimized -fdump-rtl-expand-slim t.c
-fno-tree-switch-conversion  -m32 && size t.o
   text       data        bss        dec        hex    filename
     38          0          0         38         26    t.o
$ gcc-4.5.0 -c -Os -fdump-tree-optimized -fdump-rtl-expand-slim t.c
-ftree-switch-conversion  -m32 && size t.o
   text       data        bss        dec        hex    filename
     60          0          0         60         3c    t.o



For the second test case:

$ cat t.c
int bar(int x)
{
  switch (x)
    {
    case '0':  case '1':  case '2':  case '3':  case '4':
    case '5':  case '6':  case '7':  case '8':  case '9':
    case 'A':  case 'B':  case 'C':  case 'D':  case 'E':
    case 'F':
      return 1;
    }
  return 0;
}

$ gcc-4.5.0 -c -Os -fdump-tree-optimized -fdump-rtl-expand-slim t.c
-ftree-switch-conversion  -m32 && size t.o
   text       data        bss        dec        hex    filename
    117          0          0        117         75    t.o
$ gcc-4.5.0 -c -Os -fdump-tree-optimized -fdump-rtl-expand-slim t.c
-fno-tree-switch-conversion  -m32 && size t.o
   text       data        bss        dec        hex    filename
     38          0          0         38         26    t.o
$ gcc-4.5.0 -c -Os -fdump-tree-optimized -fdump-rtl-expand-slim t.c
-ftree-switch-conversion  -m64 && size t.o
   text       data        bss        dec        hex    filename
    161          0          0        161         a1    t.o
$ gcc-4.5.0 -c -Os -fdump-tree-optimized -fdump-rtl-expand-slim t.c
-fno-tree-switch-conversion  -m64 && size t.o
   text       data        bss        dec        hex    filename
     80          0          0         80         50    t.o

Without switch conversion (-m64):

    .file    "t.c"
    .text
.globl bar
    .type    bar, @function
bar:
.LFB0:
    .cfi_startproc
    leal    -48(%rdi), %ecx
    cmpl    $22, %ecx
    ja    .L4
    movl    $1, %edx
    movl    $1, %eax
    salq    %cl, %rdx
    testl    $8258559, %edx
    jne    .L2
.L4:
    xorl    %eax, %eax
.L2:
    ret
    .cfi_endproc
.LFE0:
    .size    bar, .-bar
    .ident    "GCC: (GNU) 4.5.0 20091228 (experimental) [trunk revision
155486]"
    .section    .note.GNU-stack,"",@progbits


With switch conversion enabled:
    .file    "t.c"
    .text
.globl bar
    .type    bar, @function
bar:
.LFB0:
    .cfi_startproc
    subl    $48, %edi
    xorl    %eax, %eax
    cmpl    $22, %edi
    ja    .L2
    movslq    %edi, %rdi
    movl    CSWTCH.1(,%rdi,4), %eax
.L2:
    ret
    .cfi_endproc
.LFE0:
    .size    bar, .-bar
    .section    .rodata
    .align 16
    .type    CSWTCH.1, @object
    .size    CSWTCH.1, 92
CSWTCH.1:
    .long    1
    .long    1
    .long    1
    .long    1
    .long    1
    .long    1
    .long    1
    .long    1
    .long    1
    .long    1
    .long    0
    .long    0
    .long    0
    .long    0
    .long    0
    .long    0
    .long    0
    .long    1
    .long    1
    .long    1
    .long    1
    .long    1
    .long    1
    .ident    "GCC: (GNU) 4.5.0 20091228 (experimental) [trunk revision
155486]"
    .section    .note.GNU-stack,"",@progbits

Reply via email to