# HG changeset patch # User David T Yuen <dtyx...@gmail.com> # Date 1432085346 25200 # Node ID e096c40ce8ff9c170bdb8caa094f53b30ebd7db7 # Parent 3e07cba4b2034db2b819b2e11e98ee4b851d52b5 asm: interp_4tap_vert_pX_4xN sse2
Improved register usage for addressing of output. This improvement helps 64-bit .7% to 2.5%. Also added interp_4tap_vert_ps_4x32 in primitives setup. diff -r 3e07cba4b203 -r e096c40ce8ff source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Tue May 19 14:27:04 2015 -0700 +++ b/source/common/x86/asm-primitives.cpp Tue May 19 18:29:06 2015 -0700 @@ -1482,6 +1482,7 @@ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].filter_vps = x265_interp_4tap_vert_ps_4x4_sse2; p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].filter_vps = x265_interp_4tap_vert_ps_4x8_sse2; p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].filter_vps = x265_interp_4tap_vert_ps_4x16_sse2; + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].filter_vps = x265_interp_4tap_vert_ps_4x32_sse2; p.chroma[X265_CSP_I444].pu[LUMA_4x4].filter_vps = x265_interp_4tap_vert_ps_4x4_sse2; p.chroma[X265_CSP_I444].pu[LUMA_4x8].filter_vps = x265_interp_4tap_vert_ps_4x8_sse2; p.chroma[X265_CSP_I444].pu[LUMA_4x16].filter_vps = x265_interp_4tap_vert_ps_4x16_sse2; diff -r 3e07cba4b203 -r e096c40ce8ff source/common/x86/ipfilter8.asm --- a/source/common/x86/ipfilter8.asm Tue May 19 14:27:04 2015 -0700 +++ b/source/common/x86/ipfilter8.asm Tue May 19 18:29:06 2015 -0700 @@ -1139,6 +1139,7 @@ %endif lea r5, [3 * r1] + lea r4, [3 * r3] punpcklqdq m0, m0 %assign x 1 @@ -1243,11 +1244,10 @@ movd [r2], m2 psrldq m2, 4 movd [r2 + r3], m2 - lea r2, [r2 + 2 * r3] psrldq m2, 4 - movd [r2], m2 + movd [r2 + 2 * r3], m2 psrldq m2, 4 - movd [r2 + r3], m2 + movd [r2 + r4], m2 %elifidn %1,ps psrldq m4, 2 psrldq m5, 2 @@ -1255,13 +1255,12 @@ pshufd m5, m5, q3120 punpcklqdq m4, m5 psubw m4, m1 - lea r2, [r2 + 2 * r3] - movh [r2], m4 - movhps [r2 + r3], m4 + movh [r2 + 2 * r3], m4 + movhps [r2 + r4], m4 %endif %if x < %2/4 - lea r2, [r2 + 2 * r3] + lea r2, [r2 + 4 * r3] %endif %assign x x+1 _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel