https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92825
Bug ID: 92825 Summary: Unnecesary stack protection and missed SLP vectorization in Firefox's LightPixel. Product: gcc Version: 10.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: hubicka at gcc dot gnu.org Target Milestone: --- Created attachment 47428 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=47428&action=edit full testcase uint32_t DiffuseLightingSoftware::LightPixel(const Point3D& aNormal, const Point3D& aVectorToLight, uint32_t aColor) { Float dotNL = std::max(0.0f, aNormal.DotProduct(aVectorToLight)); Float diffuseNL = mDiffuseConstant * dotNL; union { uint32_t bgra; uint8_t components[4]; } color = {aColor}; color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_B] = umin( uint32_t(diffuseNL * color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_B]), 255U); color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_G] = umin( uint32_t(diffuseNL * color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_G]), 255U); color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_R] = umin( uint32_t(diffuseNL * color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_R]), 255U); color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_A] = 255; return color.bgra; } (full testcase attached) Built with -O3 -fstack-protection-strong results in slower code with gcc10 than with gcc9 or clang. GCC produces: │ 0000000004390e20 <mozilla::gfx::(anonymous namespace)::SpecularLightingSoftware::LightPixel(mozilla::gfx::Point3DTyped<mozilla::gfx::UnknownUnits, float> const&, │ _ZN7mozilla3gfx12_GLOBAL__N_124SpecularLightingSoftware10LightPixelERKNS0_12Point3DTypedINS0_12UnknownUnitsEfEES7_j(): 0.19 │ push %rbp 0.60 │ pxor %xmm5,%xmm5 0.05 │ mov %rsp,%rbp 0.12 │ push %rbx 0.65 │ sub $0x18,%rsp 0.33 │ movss 0x4(%rdx),%xmm0 0.10 │ movss (%rdx),%xmm1 0.58 │ mov %fs:0x28,%rax 0.03 │ mov %rax,-0x18(%rbp) 0.22 │ xor %eax,%eax 0.07 │ movss pw_32+0x1588,%xmm3 1.58 │ addss 0x8(%rdx),%xmm3 0.67 │ addss %xmm5,%xmm0 0.23 │ addss %xmm5,%xmm1 │ movaps %xmm0,%xmm2 0.41 │ movaps %xmm1,%xmm4 0.87 │ mulss %xmm0,%xmm2 0.28 │ mulss %xmm1,%xmm4 3.71 │ addss %xmm2,%xmm4 0.14 │ movaps %xmm3,%xmm2 0.04 │ mulss %xmm3,%xmm2 1.99 │ addss %xmm2,%xmm4 0.15 │ movss 0x4(%rsi),%xmm2 9.39 │ sqrtss %xmm4,%xmm4 8.90 │ divss %xmm4,%xmm0 2.10 │ divss %xmm4,%xmm3 1.08 │ mulss %xmm0,%xmm2 0.01 │ movss 0x8(%rsi),%xmm0 while clang Percent│ _ZN7mozilla3gfx12_GLOBAL__N_124SpecularLightingSoftware10LightPixelERKNS0_12Point3DTypedINS0_12UnknownUnitsEfEES7_j(): 0.11 │ xorps %xmm0,%xmm0 0.83 │ movss 0x4(%rdx),%xmm1 3.29 │ addss %xmm0,%xmm1 0.03 │ movss (%rdx),%xmm2 0.08 │ movss 0x8(%rdx),%xmm3 0.04 │ unpcklps %xmm2,%xmm3 0.59 │ movss mozilla::gfx::ConvertComponentTransferFunctionToFilter(mozilla::gfx::ComponentTransferAttributes const&, int, int, mozilla::gfx::DrawTarget*, RefPtr<m 1.00 │ addps %xmm2,%xmm3 0.10 │ movaps %xmm3,%xmm4 0.82 │ shufps $0xe5,%xmm3,%xmm4 3.05 │ mulss %xmm4,%xmm4 0.09 │ movaps %xmm1,%xmm5 0.12 │ mulss %xmm1,%xmm5 2.77 │ addss %xmm4,%xmm5 0.06 │ movaps %xmm3,%xmm4 0.00 │ mulss %xmm3,%xmm4 2.95 │ addss %xmm5,%xmm4 9.54 │ sqrtss %xmm4,%xmm4 8.84 │ divss %xmm4,%xmm1 0.08 │ shufps $0xe0,%xmm4,%xmm4 2.45 │ divps %xmm4,%xmm3 0.88 │ mulss 0x4(%rsi),%xmm1 0.01 │ movss (%rsi),%xmm4 │ movss 0x8(%rsi),%xmm5 0.02 │ unpcklps %xmm4,%xmm5 2.82 │ mulps %xmm3,%xmm5 0.03 │ movaps %xmm5,%xmm3 0.88 │ shufps $0xe5,%xmm5,%xmm3 3.47 │ addss %xmm1,%xmm3 3.39 │ addss %xmm5,%xmm3 3.09 │ cmpless %xmm3,%xmm0 1.77 │ andps %xmm2,%xmm0 3.01 │ mulss %xmm3,%xmm0 3.25 │ mulss mozIGeckoMediaPluginService::COMTypeInfo<mozIGeckoMediaPluginService, void,%xmm0 4.85 │ cvttss2si %xmm0,%eax