# HG changeset patch # User Praveen Tiwari <prav...@multicorewareinc.com> # Date 1512016234 28800 # Wed Nov 29 20:30:34 2017 -0800 # Node ID 63bedd49719fe9094ffdcbb88ac8512dccc120d2 # Parent 2a79f5eb0a9897f8703dafadfa034ba68b5955a9 x86: AVX512 idct16 kernel - optimize to use align load
diff -r 2a79f5eb0a98 -r 63bedd49719f source/common/x86/dct8.asm --- a/source/common/x86/dct8.asm Wed Nov 29 19:43:31 2017 -0800 +++ b/source/common/x86/dct8.asm Wed Nov 29 20:30:34 2017 -0800 @@ -4761,7 +4761,7 @@ paddd m10, m17 vmovdqu32 m9 {k2}, m10 - movu m5, [tab_AVX512_idct16_1 + %1 * 64] + mova m5, [tab_AVX512_idct16_1 + %1 * 64] pmaddwd m10, m1, m5 pmaddwd m11, m3, m5 @@ -4794,7 +4794,7 @@ paddd m9, m14 psrad m9, IDCT_SHIFT1 - movu m5, [tab_AVX512_idct16_2 + %1 * 64 + 64] + mova m5, [tab_AVX512_idct16_2 + %1 * 64 + 64] pmaddwd m10, m0, m5 pmaddwd m12, m7, m5 @@ -4824,7 +4824,7 @@ - movu m5, [tab_AVX512_idct16_1 + %1 * 64 + 64] + mova m5, [tab_AVX512_idct16_1 + %1 * 64 + 64] pmaddwd m12, m1, m5 pmaddwd m13, m3, m5 @@ -4864,8 +4864,8 @@ packssdw m11, m5 packssdw m9, m10 - movu m10, [idct16_AVX512_shuff] - movu m5, [idct16_AVX512_shuff1] + mova m10, [idct16_AVX512_shuff] + mova m5, [idct16_AVX512_shuff1] vpermd m%2, m10, m11 vpermd m%3, m5, m9 @@ -5232,20 +5232,20 @@ IDCT16_AVX512_PASS1 0, 22, 23 IDCT16_AVX512_PASS1 2, 24, 25 - movu m26, [idct16_AVX512_shuff2] - movu m27, [idct16_AVX512_shuff3] + mova m26, [idct16_AVX512_shuff2] + mova m27, [idct16_AVX512_shuff3] vpermi2q m26, m18, m22 vpermi2q m27, m18, m22 - movu m18, [idct16_AVX512_shuff2] - movu m22, [idct16_AVX512_shuff3] + mova m18, [idct16_AVX512_shuff2] + mova m22, [idct16_AVX512_shuff3] vpermi2q m18, m20, m24 vpermi2q m22, m20, m24 - movu m20, [idct16_AVX512_shuff4] - movu m24, [idct16_AVX512_shuff5] + mova m20, [idct16_AVX512_shuff4] + mova m24, [idct16_AVX512_shuff5] vpermi2q m20, m21, m25 vpermi2q m24, m21, m25 - movu m21, [idct16_AVX512_shuff4] - movu m25, [idct16_AVX512_shuff5] + mova m21, [idct16_AVX512_shuff4] + mova m25, [idct16_AVX512_shuff5] vpermi2q m21, m19, m23 vpermi2q m25, m19, m23 _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel