# HG changeset patch # User Jayashree # Date 1514549317 -19800 # Fri Dec 29 17:38:37 2017 +0530 # Node ID 47fd272d3c7002b5a84067a818ca4ae1c61276c1 # Parent 74965520283a92095a542ba1997798d6b3af7281 x86:AVX512 intra_pred_ang32 mode 26 for high bit depth
Primitive | AVX2 performance | AVX512 performance ------------------------------------------------------------- intra_ang_32x32[26] | 2.31x | 4.38x diff -r 74965520283a -r 47fd272d3c70 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Wed Dec 27 14:51:40 2017 +0530 +++ b/source/common/x86/asm-primitives.cpp Fri Dec 29 17:38:37 2017 +0530 @@ -3093,6 +3093,8 @@ p.cu[BLOCK_32x32].intra_pred[34] = PFX(intra_pred_ang32_2_avx512); p.cu[BLOCK_32x32].intra_pred[10] = PFX(intra_pred_ang32_10_avx512); p.cu[BLOCK_32x32].intra_pred[18] = PFX(intra_pred_ang32_18_avx512); + p.cu[BLOCK_32x32].intra_pred[26] = PFX(intra_pred_ang32_26_avx512); + p.pu[LUMA_64x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x64>; p.pu[LUMA_64x48].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x48>; p.pu[LUMA_64x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x32>; diff -r 74965520283a -r 47fd272d3c70 source/common/x86/intrapred16.asm --- a/source/common/x86/intrapred16.asm Wed Dec 27 14:51:40 2017 +0530 +++ b/source/common/x86/intrapred16.asm Fri Dec 29 17:38:37 2017 +0530 @@ -18594,9 +18594,52 @@ palignr m4, m2, m0, 2 movu [r0 + r3], m4 mov rsp, [rsp+4*(mmsize/2)] - - RET - + RET +INIT_ZMM avx512 +cglobal intra_pred_ang32_26, 3,3,2 + movu m0, [r2 + 2] + add r1d, r1d + lea r2, [r1 * 3] + movu [r0], m0 + movu [r0 + r1], m0 + movu [r0 + r1 * 2], m0 + movu [r0 + r2], m0 + lea r0, [r0 + r1 *4] + movu [r0], m0 + movu [r0 + r1], m0 + movu [r0 + r1 * 2], m0 + movu [r0 + r2], m0 + lea r0, [r0 + r1 *4] + movu [r0], m0 + movu [r0 + r1], m0 + movu [r0 + r1 * 2], m0 + movu [r0 + r2], m0 + lea r0, [r0 + r1 *4] + movu [r0], m0 + movu [r0 + r1], m0 + movu [r0 + r1 * 2], m0 + movu [r0 + r2], m0 + lea r0, [r0 + r1 *4] + movu [r0], m0 + movu [r0 + r1], m0 + movu [r0 + r1 * 2], m0 + movu [r0 + r2], m0 + lea r0, [r0 + r1 *4] + movu [r0], m0 + movu [r0 + r1], m0 + movu [r0 + r1 * 2], m0 + movu [r0 + r2], m0 + lea r0, [r0 + r1 *4] + movu [r0], m0 + movu [r0 + r1], m0 + movu [r0 + r1 * 2], m0 + movu [r0 + r2], m0 + lea r0, [r0 + r1 *4] + movu [r0], m0 + movu [r0 + r1], m0 + movu [r0 + r1 * 2], m0 + movu [r0 + r2], m0 + RET ;------------------------------------------------------------------------------------------------------- ; avx512 code for intra_pred_ang32 mode 2 to 34 end ;------------------------------------------------------------------------------------------------------- _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel