[Bug c/50272] New: A case that PRE optimization hurts performance

jiangning.liu at arm dot com Thu, 01 Sep 2011 22:08:25 -0700

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50272


             Bug #: 50272
           Summary: A case that PRE optimization hurts performance
    Classification: Unclassified
           Product: gcc
           Version: 4.7.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
        AssignedTo: unassig...@gcc.gnu.org
        ReportedBy: jiangning....@arm.com


For the following simple test case, PRE optimization hoists computation
(s!=1) into the default branch of the switch statement, and finally causes
very poor code generation. This problem occurs in both X86 and ARM, and I
believe it is also a problem for other targets. 

int f(char *t) {
    int s=0;

    while (*t && s != 1) {
        switch (s) {
        case 0:
            s = 2;
            break;
        case 2:
            s = 1;
            break;
        default:
            if (*t == '-') 
                s = 1;
            break;
        }
        t++;
    }

    return s;
}

Taking X86 as an example, with option "-O2" you may find 52 instructions
generated like below,

00000000 <f>:
   0:    55                       push   %ebp
   1:    31 c0                    xor    %eax,%eax
   3:    89 e5                    mov    %esp,%ebp
   5:    57                       push   %edi
   6:    56                       push   %esi
   7:    53                       push   %ebx
   8:    8b 55 08                 mov    0x8(%ebp),%edx
   b:    0f b6 0a                 movzbl (%edx),%ecx
   e:    84 c9                    test   %cl,%cl
  10:    74 50                    je     62 <f+0x62>
  12:    83 c2 01                 add    $0x1,%edx
  15:    85 c0                    test   %eax,%eax
  17:    75 23                    jne    3c <f+0x3c>
  19:    8d b4 26 00 00 00 00     lea    0x0(%esi,%eiz,1),%esi
  20:    0f b6 0a                 movzbl (%edx),%ecx
  23:    84 c9                    test   %cl,%cl
  25:    0f 95 c0                 setne  %al
  28:    89 c7                    mov    %eax,%edi
  2a:    b8 02 00 00 00           mov    $0x2,%eax
  2f:    89 fb                    mov    %edi,%ebx
  31:    83 c2 01                 add    $0x1,%edx
  34:    84 db                    test   %bl,%bl
  36:    74 2a                    je     62 <f+0x62>
  38:    85 c0                    test   %eax,%eax
  3a:    74 e4                    je     20 <f+0x20>
  3c:    83 f8 02                 cmp    $0x2,%eax
  3f:    74 1f                    je     60 <f+0x60>
  41:    80 f9 2d                 cmp    $0x2d,%cl
  44:    74 22                    je     68 <f+0x68>
  46:    0f b6 0a                 movzbl (%edx),%ecx
  49:    83 f8 01                 cmp    $0x1,%eax
  4c:    0f 95 c3                 setne  %bl
  4f:    89 df                    mov    %ebx,%edi
  51:    84 c9                    test   %cl,%cl
  53:    0f 95 c3                 setne  %bl
  56:    89 de                    mov    %ebx,%esi
  58:    21 f7                    and    %esi,%edi
  5a:    eb d3                    jmp    2f <f+0x2f>
  5c:    8d 74 26 00              lea    0x0(%esi,%eiz,1),%esi
  60:    b0 01                    mov    $0x1,%al
  62:    5b                       pop    %ebx
  63:    5e                       pop    %esi
  64:    5f                       pop    %edi
  65:    5d                       pop    %ebp
  66:    c3                       ret    
  67:    90                       nop
  68:    b8 01 00 00 00           mov    $0x1,%eax
  6d:    5b                       pop    %ebx
  6e:    5e                       pop    %esi
  6f:    5f                       pop    %edi
  70:    5d                       pop    %ebp
  71:    c3                       ret    

But with command line option "-O2 -fno-tree-pre", there are only 12
instructions generated, and the code would be very clean like below,

00000000 <f>:
   0:    55                       push   %ebp
   1:    31 c0                    xor    %eax,%eax
   3:    89 e5                    mov    %esp,%ebp
   5:    8b 55 08                 mov    0x8(%ebp),%edx
   8:    80 3a 00                 cmpb   $0x0,(%edx)
   b:    74 0e                    je     1b <f+0x1b>
   d:    80 7a 01 00              cmpb   $0x0,0x1(%edx)
  11:    b0 02                    mov    $0x2,%al
  13:    ba 01 00 00 00           mov    $0x1,%edx
  18:    0f 45 c2                 cmovne %edx,%eax
  1b:    5d                       pop    %ebp
  1c:    c3                       ret

[Bug c/50272] New: A case that PRE optimization hurts performance

Reply via email to