https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103797

            Bug ID: 103797
           Summary: Clang vectorized LightPixel while GCC does not
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: hubicka at gcc dot gnu.org
  Target Milestone: ---

Clang vectorises divss in LightPixel while GCC does not (at -O3).  This seems
to account for 17% difference in resteflood_svg benchmark of Firefox.

       │    0000000001864660 <mozilla::gfx::(anonymous
namespace)::SpecularLightingSoftware::LightPixel(mozilla::gfx::Point3DTyped<mozilla::gfx::UnknownUnits,
float> const&, mozilla::gfx::Point3DTyped<mozilla::gfx::UnknownUnits, float>
const&, unsigned int)>:
       │    mozilla::gfx::(anonymous
namespace)::SpecularLightingSoftware::LightPixel(mozilla::gfx::Point3DTyped<mozilla::gfx::UnknownUnits,
float> const&, mozilla::gfx::Point3DTyped<mozilla::gfx::UnknownUnits, float>
const&, unsigned int):
  0.05 │      push      %rbp
  0.07 │      mov       %rsp,%rbp
  0.71 │      xorps     %xmm6,%xmm6
  0.32 │      addss     %xmm6,%xmm4
       │      unpcklps  %xmm3,%xmm5
  0.78 │      movss    
anon.5bcbce9b5eeaaf1a18a99b9a5b62e1ce.3.llvm.5306652999446557335+0x6d8,%xmm8
  0.01 │      addps     %xmm8,%xmm5
  1.47 │      movaps    %xmm4,%xmm9
       │      mulss     %xmm4,%xmm9
       │      movaps    %xmm5,%xmm7
  0.01 │      mulps     %xmm5,%xmm7
  3.35 │      movaps    %xmm7,%xmm3
       │      shufps    $0x55,%xmm7,%xmm3
  0.99 │      addss     %xmm9,%xmm3
  1.59 │      addss     %xmm7,%xmm3
  2.01 │      sqrtss    %xmm3,%xmm3
 11.43 │      divss     %xmm3,%xmm4
  6.76 │      shufps    $0x0,%xmm3,%xmm3
  0.01 │      divps     %xmm3,%xmm5
  2.58 │      mulss     %xmm1,%xmm4
  0.04 │      unpcklps  %xmm0,%xmm2
       │      mulps     %xmm5,%xmm2
  2.67 │      movaps    %xmm2,%xmm0
  0.04 │      shufps    $0x55,%xmm2,%xmm0
  2.11 │      addss     %xmm4,%xmm0
  1.87 │      addss     %xmm2,%xmm0
  2.82 │      cmpless   %xmm0,%xmm6
  2.20 │      andps     %xmm8,%xmm6
  1.05 │      mulss     %xmm0,%xmm6
  4.04 │      mulss     .str.6.llvm.231702015065810902+0x77,%xmm6
  3.14 │      cvttss2si %xmm6,%eax
  4.45 │      mov       0x8(%rdi),%ecx
  0.00 │      mov       0xc(%rdi),%edx
       │      movzwl    %ax,%eax
  1.10 │      test      %edx,%edx
       │    ↓ jle       92
       │88:   imul      %eax,%eax
  9.06 │      shr       $0xf,%eax
  3.12 │      dec       %edx
       │    ↑ jne       88
       │92:   shr       $0x8,%eax
  1.95 │      movzwl    0x10(%rdi,%rax,2),%eax
  6.48 │      imul      %eax,%ecx
  0.99 │      shr       $0x8,%ecx
  1.06 │      mov       %esi,%eax
  0.01 │      shr       $0x8,%eax
       │      mov       %esi,%edx
       │      shr       $0x10,%edx
  0.01 │      mov       $0xff,%edi
       │      and       %edi,%esi
  0.01 │      imul      %ecx,%esi
  3.32 │      shr       $0xf,%esi
  1.81 │      cmp       %edi,%esi ▒
  0.04 │      cmovae    %edi,%esi ▒
  1.99 │      and       %edi,%eax ▒
  0.01 │      imul      %ecx,%eax ▒
       │      shr       $0xf,%eax ▒
  0.01 │      cmp       %edi,%eax ▒
  0.28 │      cmovae    %edi,%eax ▒
  0.96 │      and       %edi,%edx ▒
       │      imul      %ecx,%edx ▒
       │      shr       $0xf,%edx ▒
  0.92 │      cmp       %edi,%edx ▒
  0.85 │      cmovae    %edi,%edx ▒
  1.00 │      cmp       %eax,%edx ▒
  1.20 │      mov       %eax,%ecx ▒
       │      cmova     %edx,%ecx ▒
  2.17 │      cmp       %esi,%ecx ▒
  1.15 │      cmovbe    %esi,%ecx ▒
  1.79 │      shl       $0x18,%ecx▒
  1.17 │      shl       $0x10,%edx▒
       │      shl       $0x8,%eax ▒
  0.03 │      or        %edx,%eax ▒
  0.01 │      or        %esi,%eax ▒
  0.14 │      or        %ecx,%eax ▒
  0.72 │      pop       %rbp      ▒
  0.04 │    ← ret                                                              
                                                                               
                                                                               
                      ▒

Reply via email to