Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> > On Nov 13, 2017, at 8:03 PM, Tim Rowley <timothy.o.row...@intel.com> wrote: > > Speed up avx512 platforms; fixes performance regression caused > by swithc to simdlib. > > Cc: mesa-sta...@lists.freedesktop.org > --- > .../drivers/swr/rasterizer/common/simdlib_512_avx512.inl | 12 +----------- > 1 file changed, 1 insertion(+), 11 deletions(-) > > diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl > b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl > index 95e4c31909..c13b9f616a 100644 > --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl > +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl > @@ -484,17 +484,7 @@ SIMD_WRAPPER_2(unpacklo_ps); > template<ScaleFactor ScaleT> > static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // > return *(float*)(((int8*)p) + (idx * ScaleT)) > { > - uint32_t *pOffsets = (uint32_t*)&idx; > - Float vResult; > - float* pResult = (float*)&vResult; > - for (uint32_t i = 0; i < SIMD_WIDTH; ++i) > - { > - uint32_t offset = pOffsets[i]; > - offset = offset * static_cast<uint32_t>(ScaleT); > - pResult[i] = *(float const*)(((uint8_t const*)p + offset)); > - } > - > - return vResult; > + return _mm512_i32gather_ps(idx, p, static_cast<int>(ScaleT)); > } > > static SIMDINLINE Float SIMDCALL load1_ps(float const *p) // return *p > (broadcast 1 value to all elements) > -- > 2.14.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev