this version is good
At 2015-02-02 19:34:13,[email protected] wrote: ># HG changeset patch ># User Praveen Tiwari ># Date 1422876820 -19800 ># Branch stable ># Node ID 8e1f8ca9d4112d8ad9801bf79518482306ff55ce ># Parent bc0fbae84481a82011ce7548efe67210bb14666f >blockfill_s_8x8 sse2 asm code optimization > >improved, 100.04c -> 90.05c > >diff -r bc0fbae84481 -r 8e1f8ca9d411 source/common/x86/blockcopy8.asm >--- a/source/common/x86/blockcopy8.asm Mon Feb 02 14:34:16 2015 +0530 >+++ b/source/common/x86/blockcopy8.asm Mon Feb 02 17:03:40 2015 +0530 >@@ -1748,9 +1748,10 @@ > ; void blockfill_s_8x8(int16_t* dst, intptr_t dstride, int16_t val) > ;----------------------------------------------------------------------------- > INIT_XMM sse2 >-cglobal blockfill_s_8x8, 3, 3, 1, dst, dstStride, val >+cglobal blockfill_s_8x8, 3, 4, 1, dst, dstStride, val > > add r1, r1 >+lea r3, [3 * r1] > > movd m0, r2d > pshuflw m0, m0, 0 >@@ -1760,17 +1761,13 @@ > movu [r0 + r1], m0 > movu [r0 + 2 * r1], m0 > >-lea r0, [r0 + 2 * r1] >+movu [r0 + r3], m0 >+ >+lea r0, [r0 + 4 * r1] >+movu [r0], m0 > movu [r0 + r1], m0 > movu [r0 + 2 * r1], m0 >- >-lea r0, [r0 + 2 * r1] >-movu [r0 + r1], m0 >-movu [r0 + 2 * r1], m0 >- >-lea r0, [r0 + 2 * r1] >-movu [r0 + r1], m0 >- >+movu [r0 + r3], m0 > RET > > ;----------------------------------------------------------------------------- >_______________________________________________ >x265-devel mailing list >[email protected] >https://mailman.videolan.org/listinfo/x265-devel
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
