right, a few improve because you need more register, it generate push/pop stack operators
At 2015-02-03 14:28:43,[email protected] wrote: ># HG changeset patch ># User Praveen Tiwari ># Date 1422944898 -19800 ># Node ID 059892f65db3e4c70017241ea847717e11be0124 ># Parent b0be54fb45cf1cbd3bf5a0543da34fb1a854b25f >blockcopy_pp[4x16:4x32], sse2 asm code optimization > >improved, 222.93c -> 206.77c > 409.49c -> 392.13c > >diff -r b0be54fb45cf -r 059892f65db3 source/common/x86/blockcopy8.asm >--- a/source/common/x86/blockcopy8.asm Tue Feb 03 11:25:35 2015 +0530 >+++ b/source/common/x86/blockcopy8.asm Tue Feb 03 11:58:18 2015 +0530 >@@ -181,37 +181,36 @@ > ;----------------------------------------------------------------------------- > %macro BLOCKCOPY_PP_W4_H8 2 > INIT_XMM sse2 >-cglobal blockcopy_pp_%1x%2, 4, 5, 4 >+cglobal blockcopy_pp_%1x%2, 4, 7, 4 > mov r4d, %2/8 >+ lea r5, [3 * r1] >+ lea r6, [3 * r3] >+ > .loop: > movd m0, [r2] > movd m1, [r2 + r3] >- lea r2, [r2 + 2 * r3] >- movd m2, [r2] >- movd m3, [r2 + r3] >- >- movd [r0], m0 >- movd [r0 + r1], m1 >- lea r0, [r0 + 2 * r1] >- movd [r0], m2 >- movd [r0 + r1], m3 >- >- lea r0, [r0 + 2 * r1] >- lea r2, [r2 + 2 * r3] >+ movd m2, [r2 + 2 * r3] >+ movd m3, [r2 + r6] >+ >+ movd [r0], m0 >+ movd [r0 + r1], m1 >+ movd [r0 + 2 * r1], m2 >+ movd [r0 + r5], m3 >+ >+ lea r2, [r2 + 4 * r3] > movd m0, [r2] > movd m1, [r2 + r3] >- lea r2, [r2 + 2 * r3] >- movd m2, [r2] >- movd m3, [r2 + r3] >- >- movd [r0], m0 >- movd [r0 + r1], m1 >- lea r0, [r0 + 2 * r1] >- movd [r0], m2 >- movd [r0 + r1], m3 >- >- lea r0, [r0 + 2 * r1] >- lea r2, [r2 + 2 * r3] >+ movd m2, [r2 + 2 * r3] >+ movd m3, [r2 + r6] >+ >+ lea r0, [r0 + 4 * r1] >+ movd [r0], m0 >+ movd [r0 + r1], m1 >+ movd [r0 + 2 * r1], m2 >+ movd [r0 + r5], m3 >+ >+ lea r0, [r0 + 4 * r1] >+ lea r2, [r2 + 4 * r3] > > dec r4d > jnz .loop >@@ -219,7 +218,6 @@ > %endmacro > > BLOCKCOPY_PP_W4_H8 4, 16 >- > BLOCKCOPY_PP_W4_H8 4, 32 > > ;----------------------------------------------------------------------------- >_______________________________________________ >x265-devel mailing list >[email protected] >https://mailman.videolan.org/listinfo/x265-devel
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
