Re: [FFmpeg-devel] [PATCH 1/2] pixblockdsp: x86: Condense diff_pixels_* to a shared macro
Hi, On Sun, Nov 1, 2015 at 11:59 AM, Timothy Gu wrote: > --- > libavcodec/x86/pixblockdsp.asm | 66 > -- > 1 file changed, 31 insertions(+), 35 deletions(-) > > diff --git a/libavcodec/x86/pixblockdsp.asm > b/libavcodec/x86/pixblockdsp.asm > index 7c5377b..a7d9816 100644 > --- a/libavcodec/x86/pixblockdsp.asm > +++ b/libavcodec/x86/pixblockdsp.asm > @@ -80,54 +80,50 @@ cglobal get_pixels, 3, 4, 5 > mova [r0+0x70], m3 > RET > > -INIT_MMX mmx > ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const > uint8_t *s2, > ; int stride); > -cglobal diff_pixels, 4,5 > -movsxdifnidn r3, r3d > -pxor m7, m7 > -add r0, 128 > -mov r4, -128 > -.loop: > -mova m0, [r1] > -mova m2, [r2] > -mova m1, m0 > -mova m3, m2 > -punpcklbwm0, m7 > -punpckhbwm1, m7 > -punpcklbwm2, m7 > -punpckhbwm3, m7 > -psubwm0, m2 > -psubwm1, m3 > -mova [r0+r4+0], m0 > -mova [r0+r4+8], m1 > -add r1, r3 > -add r2, r3 > -add r4, 16 > -jne .loop > -REP_RET > - > -INIT_XMM sse2 > -cglobal diff_pixels, 4, 5, 5 > +%macro DIFF_PIXELS 0 > +cglobal diff_pixels, 4,5,5 > movsxdifnidn r3, r3d > pxor m4, m4 > add r0, 128 > mov r4, -128 > .loop: > -movh m0, [r1] > -movh m2, [r2] > -movh m1, [r1+r3] > -movh m3, [r2+r3] > +movq m0, [r1] > +movq m2, [r2] > +%if mmsize == 8 > +movq m1, m0 > +movq m3, m2 > +punpcklbwm0, m4 > +punpckhbwm1, m4 > +punpcklbwm2, m4 > +punpckhbwm3, m4 > +%else > +movq m1, [r1+r3] > +movq m3, [r2+r3] > punpcklbwm0, m4 > punpcklbwm1, m4 > punpcklbwm2, m4 > punpcklbwm3, m4 +%endif > psubwm0, m2 > psubwm1, m3 > -mova [r0+r4+0 ], m0 > -mova [r0+r4+16], m1 > +mova [r0+r4+0], m0 > +mova [r0+r4+mmsize], m1 > +%if mmsize == 8 > +add r1, r3 > +add r2, r3 > +%else > lea r1, [r1+r3*2] > lea r2, [r2+r3*2] > -add r4, 32 > +%endif > +add r4, 2 * mmsize > jne .loop > -RET > +REP_RET > RET. We don't use REP_RET anymore. Rest is fine. Ronald ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 1/2] pixblockdsp: x86: Condense diff_pixels_* to a shared macro
On 11/1/2015 1:59 PM, Timothy Gu wrote: > --- > libavcodec/x86/pixblockdsp.asm | 66 > -- > 1 file changed, 31 insertions(+), 35 deletions(-) > LGTM ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 1/2] pixblockdsp: x86: Condense diff_pixels_* to a shared macro
On Sun, Nov 1, 2015 at 8:59 AM Timothy Gu wrote: > --- > libavcodec/x86/pixblockdsp.asm | 66 > -- > 1 file changed, 31 insertions(+), 35 deletions(-) > Ping set. Timothy ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 1/2] pixblockdsp: x86: Condense diff_pixels_* to a shared macro
--- libavcodec/x86/pixblockdsp.asm | 66 -- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/libavcodec/x86/pixblockdsp.asm b/libavcodec/x86/pixblockdsp.asm index 7c5377b..a7d9816 100644 --- a/libavcodec/x86/pixblockdsp.asm +++ b/libavcodec/x86/pixblockdsp.asm @@ -80,54 +80,50 @@ cglobal get_pixels, 3, 4, 5 mova [r0+0x70], m3 RET -INIT_MMX mmx ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, ; int stride); -cglobal diff_pixels, 4,5 -movsxdifnidn r3, r3d -pxor m7, m7 -add r0, 128 -mov r4, -128 -.loop: -mova m0, [r1] -mova m2, [r2] -mova m1, m0 -mova m3, m2 -punpcklbwm0, m7 -punpckhbwm1, m7 -punpcklbwm2, m7 -punpckhbwm3, m7 -psubwm0, m2 -psubwm1, m3 -mova [r0+r4+0], m0 -mova [r0+r4+8], m1 -add r1, r3 -add r2, r3 -add r4, 16 -jne .loop -REP_RET - -INIT_XMM sse2 -cglobal diff_pixels, 4, 5, 5 +%macro DIFF_PIXELS 0 +cglobal diff_pixels, 4,5,5 movsxdifnidn r3, r3d pxor m4, m4 add r0, 128 mov r4, -128 .loop: -movh m0, [r1] -movh m2, [r2] -movh m1, [r1+r3] -movh m3, [r2+r3] +movq m0, [r1] +movq m2, [r2] +%if mmsize == 8 +movq m1, m0 +movq m3, m2 +punpcklbwm0, m4 +punpckhbwm1, m4 +punpcklbwm2, m4 +punpckhbwm3, m4 +%else +movq m1, [r1+r3] +movq m3, [r2+r3] punpcklbwm0, m4 punpcklbwm1, m4 punpcklbwm2, m4 punpcklbwm3, m4 +%endif psubwm0, m2 psubwm1, m3 -mova [r0+r4+0 ], m0 -mova [r0+r4+16], m1 +mova [r0+r4+0], m0 +mova [r0+r4+mmsize], m1 +%if mmsize == 8 +add r1, r3 +add r2, r3 +%else lea r1, [r1+r3*2] lea r2, [r2+r3*2] -add r4, 32 +%endif +add r4, 2 * mmsize jne .loop -RET +REP_RET +%endmacro + +INIT_MMX mmx +DIFF_PIXELS + +INIT_XMM sse2 +DIFF_PIXELS -- 2.1.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel