# HG changeset patch # User Murugan Vairavel <[email protected]> # Date 1384168182 -19800 # Mon Nov 11 16:39:42 2013 +0530 # Node ID 2dfda7d0c819c026f8ad41bced831672a02976f2 # Parent d0f8cfc0dd3157acfdcc5e32871de568a5354e6a asm: pixelsub_ps routine for 64xN blocks
diff -r d0f8cfc0dd31 -r 2dfda7d0c819 source/common/x86/pixel-a.asm --- a/source/common/x86/pixel-a.asm Mon Nov 11 15:24:08 2013 +0530 +++ b/source/common/x86/pixel-a.asm Mon Nov 11 16:39:42 2013 +0530 @@ -6047,3 +6047,136 @@ %endmacro PIXELSUB_PS_W48_H2 48, 64 + +;----------------------------------------------------------------------------- +; void pixel_sub_ps_c_%1x%2(pixel *dest, intptr_t destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t srcstride1); +;----------------------------------------------------------------------------- +%macro PIXELSUB_PS_W64_H2 2 +INIT_XMM sse4 +cglobal pixel_sub_ps_%1x%2, 6, 7, 7, dest, deststride, src0, src1, srcstride0, srcstride1 + +add r1, r1 +mov r6d, %2/2 +pxor m6, m6 + +.loop + + movu m1, [r2] + pmovzxbw m0, m1 + punpckhbw m1, m6 + movu m3, [r3] + pmovzxbw m2, m3 + punpckhbw m3, m6 + movu m5, [r2 + 16] + pmovzxbw m4, m5 + punpckhbw m5, m6 + + psubw m0, m2 + psubw m1, m3 + + movu [r0], m0 + movu [r0 + 16], m1 + + movu m1, [r3 + 16] + pmovzxbw m0, m1 + punpckhbw m1, m6 + movu m3, [r2 + 32] + pmovzxbw m2, m3 + punpckhbw m3, m6 + + psubw m4, m0 + psubw m5, m1 + + movu [r0 + 32], m4 + movu [r0 + 48], m5 + + movu m5, [r3 + 32] + pmovzxbw m4, m5 + punpckhbw m5, m6 + movu m1, [r2 + 48] + pmovzxbw m0, m1 + punpckhbw m1, m6 + + psubw m2, m4 + psubw m3, m5 + + movu [r0 + 64], m2 + movu [r0 + 80], m3 + + movu m3, [r3 + 48] + pmovzxbw m2, m3 + punpckhbw m3, m6 + movu m5, [r2 + r4] + pmovzxbw m4, m5 + punpckhbw m5, m6 + + psubw m0, m2 + psubw m1, m3 + + movu [r0 + 96], m0 + movu [r0 + 112], m1 + + movu m1, [r3 + r5] + pmovzxbw m0, m1 + punpckhbw m1, m6 + movu m3, [r2 + r4 + 16] + pmovzxbw m2, m3 + punpckhbw m3, m6 + + psubw m4, m0 + psubw m5, m1 + + movu [r0 + r1], m4 + movu [r0 + r1 + 16], m5 + + movu m5, [r3 + r5 + 16] + pmovzxbw m4, m5 + punpckhbw m5, m6 + movu m1, [r2 + r4 + 32] + pmovzxbw m0, m1 + punpckhbw m1, m6 + + psubw m2, m4 + psubw m3, m5 + + movu [r0 + r1 + 32], m2 + movu [r0 + r1 + 48], m3 + + movu m3, [r3 + r5 + 32] + pmovzxbw m2, m3 + punpckhbw m3, m6 + movu m5, [r2 + r4 + 48] + pmovzxbw m4, m5 + punpckhbw m5, m6 + + psubw m0, m2 + psubw m1, m3 + + movu [r0 + r1 + 64], m0 + movu [r0 + r1 + 80], m1 + + movu m1, [r3 + r5 + 48] + pmovzxbw m0, m1 + punpckhbw m1, m6 + + psubw m4, m0 + psubw m5, m1 + + movu [r0 + r1 + 96], m4 + movu [r0 + r1 + 112], m5 + + lea r2, [r2 + 2 * r4] + lea r3, [r3 + 2 * r5] + lea r0, [r0 + 2 * r1] + + dec r6d + +jnz .loop + +RET +%endmacro + +PIXELSUB_PS_W64_H2 64, 16 +PIXELSUB_PS_W64_H2 64, 32 +PIXELSUB_PS_W64_H2 64, 48 +PIXELSUB_PS_W64_H2 64, 64 _______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
