https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67438

            Bug ID: 67438
           Summary: [6 Regression] ~X op ~Y pattern relocation causes loop
                    performance degradation
           Product: gcc
           Version: 6.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: afomin.mailbox at gmail dot com
                CC: izamyatin at gmail dot com, rguenth at gcc dot gnu.org,
                    ysrumyan at gmail dot com
  Target Milestone: ---
            Target: i686

For the loop in the attached test compiled with -O3 -m32 -march=slm
-ftree-loop-if-convert (in fact, -march=slm can be omitted resulting in a
greater number of insns) after r225249 we generate 28 insns instead of 23 insns
for r225248. That revision moves some simplification patterns from fold-const.c
to match.pd, and I've noticed that relocating back ~X op ~Y -> Y op X from
match.pd to fold-const.c fixes the problem.

r225248:
movzbl (%ebx),%ecx
add    $0x3,%ebx
movzbl -0x2(%ebx),%edx
not    %ecx
movzbl -0x1(%ebx),%eax
not    %edx
mov    %cl,(%esi)
mov    %dl,0x1(%esi)
not    %eax
cmp    %al,%cl
mov    %eax,%edi
mov    %al,0x2(%esi)
mov    %eax,%ebp
cmovle %ecx,%edi
cmp    %al,%dl
cmovle %edx,%ebp
add    $0x4,%esi
cmp    %dl,%cl
mov    %ebp,%eax
cmovl  %edi,%eax
cmp    (%esp),%ebx
mov    %al,-0x1(%esi)
jne    30 <foo+0x30>

r225249:
movzbl (%edi),%eax
add    $0x3,%edi
movzbl -0x2(%edi),%edx
mov    %al,0x2(%esp)
mov    %eax,%ebx
movzbl -0x1(%edi),%eax
not    %ebx
mov    %dl,0x3(%esp)
mov    %edx,%ecx
mov    %bl,0x0(%ebp)
not    %ecx
mov    %cl,0x1(%ebp)
not    %eax
cmp    %al,%bl
mov    %eax,%esi
mov    %al,0x2(%ebp)
cmovle %ebx,%esi
cmp    %al,%cl
mov    %esi,%edx
mov    %eax,%esi
cmovle %ecx,%esi
add    $0x4,%ebp
movzbl 0x3(%esp),%ecx
cmp    %cl,0x2(%esp)
cmovle %esi,%edx
cmp    0x4(%esp),%edi
mov    %dl,-0x1(%ebp)
jne    30 <foo+0x30>

Reply via email to