Re: [FFmpeg-devel] [PATCH 1/2] pixblockdsp: x86: Condense diff_pixels_* to a shared macro

2015-11-06 Thread Timothy Gu
On Sun, Nov 1, 2015 at 8:59 AM Timothy Gu  wrote:

> ---
>  libavcodec/x86/pixblockdsp.asm | 66
> --
>  1 file changed, 31 insertions(+), 35 deletions(-)
>

Ping set.

Timothy
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/2] pixblockdsp: x86: Condense diff_pixels_* to a shared macro

2015-11-06 Thread James Almer
On 11/1/2015 1:59 PM, Timothy Gu wrote:
> ---
>  libavcodec/x86/pixblockdsp.asm | 66 
> --
>  1 file changed, 31 insertions(+), 35 deletions(-)
> 

LGTM

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/2] pixblockdsp: x86: Condense diff_pixels_* to a shared macro

2015-11-06 Thread Ronald S. Bultje
Hi,

On Sun, Nov 1, 2015 at 11:59 AM, Timothy Gu  wrote:

> ---
>  libavcodec/x86/pixblockdsp.asm | 66
> --
>  1 file changed, 31 insertions(+), 35 deletions(-)
>
> diff --git a/libavcodec/x86/pixblockdsp.asm
> b/libavcodec/x86/pixblockdsp.asm
> index 7c5377b..a7d9816 100644
> --- a/libavcodec/x86/pixblockdsp.asm
> +++ b/libavcodec/x86/pixblockdsp.asm
> @@ -80,54 +80,50 @@ cglobal get_pixels, 3, 4, 5
>  mova  [r0+0x70], m3
>  RET
>
> -INIT_MMX mmx
>  ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const
> uint8_t *s2,
>  ; int stride);
> -cglobal diff_pixels, 4,5
> -movsxdifnidn r3, r3d
> -pxor m7, m7
> -add  r0,  128
> -mov  r4, -128
> -.loop:
> -mova m0, [r1]
> -mova m2, [r2]
> -mova m1, m0
> -mova m3, m2
> -punpcklbwm0, m7
> -punpckhbwm1, m7
> -punpcklbwm2, m7
> -punpckhbwm3, m7
> -psubwm0, m2
> -psubwm1, m3
> -mova  [r0+r4+0], m0
> -mova  [r0+r4+8], m1
> -add  r1, r3
> -add  r2, r3
> -add  r4, 16
> -jne .loop
> -REP_RET
> -
> -INIT_XMM sse2
> -cglobal diff_pixels, 4, 5, 5
> +%macro DIFF_PIXELS 0
> +cglobal diff_pixels, 4,5,5
>  movsxdifnidn r3, r3d
>  pxor m4, m4
>  add  r0,  128
>  mov  r4, -128
>  .loop:
> -movh m0, [r1]
> -movh m2, [r2]
> -movh m1, [r1+r3]
> -movh m3, [r2+r3]
> +movq m0, [r1]
> +movq m2, [r2]
> +%if mmsize == 8
> +movq m1, m0
> +movq m3, m2
> +punpcklbwm0, m4
> +punpckhbwm1, m4
> +punpcklbwm2, m4
> +punpckhbwm3, m4
> +%else
> +movq m1, [r1+r3]
> +movq m3, [r2+r3]
>  punpcklbwm0, m4
>  punpcklbwm1, m4
>  punpcklbwm2, m4
>  punpcklbwm3, m4

+%endif
>  psubwm0, m2
>  psubwm1, m3
> -mova [r0+r4+0 ], m0
> -mova [r0+r4+16], m1
> +mova  [r0+r4+0], m0
> +mova  [r0+r4+mmsize], m1
> +%if mmsize == 8
> +add  r1, r3
> +add  r2, r3
> +%else
>  lea  r1, [r1+r3*2]
>  lea  r2, [r2+r3*2]
> -add  r4, 32
> +%endif
> +add  r4, 2 * mmsize
>  jne .loop
> -RET
> +REP_RET
>

RET. We don't use REP_RET anymore.

Rest is fine.

Ronald
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/2] pixblockdsp: x86: Condense diff_pixels_* to a shared macro

2015-11-01 Thread Timothy Gu
---
 libavcodec/x86/pixblockdsp.asm | 66 --
 1 file changed, 31 insertions(+), 35 deletions(-)

diff --git a/libavcodec/x86/pixblockdsp.asm b/libavcodec/x86/pixblockdsp.asm
index 7c5377b..a7d9816 100644
--- a/libavcodec/x86/pixblockdsp.asm
+++ b/libavcodec/x86/pixblockdsp.asm
@@ -80,54 +80,50 @@ cglobal get_pixels, 3, 4, 5
 mova  [r0+0x70], m3
 RET
 
-INIT_MMX mmx
 ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
 ; int stride);
-cglobal diff_pixels, 4,5
-movsxdifnidn r3, r3d
-pxor m7, m7
-add  r0,  128
-mov  r4, -128
-.loop:
-mova m0, [r1]
-mova m2, [r2]
-mova m1, m0
-mova m3, m2
-punpcklbwm0, m7
-punpckhbwm1, m7
-punpcklbwm2, m7
-punpckhbwm3, m7
-psubwm0, m2
-psubwm1, m3
-mova  [r0+r4+0], m0
-mova  [r0+r4+8], m1
-add  r1, r3
-add  r2, r3
-add  r4, 16
-jne .loop
-REP_RET
-
-INIT_XMM sse2
-cglobal diff_pixels, 4, 5, 5
+%macro DIFF_PIXELS 0
+cglobal diff_pixels, 4,5,5
 movsxdifnidn r3, r3d
 pxor m4, m4
 add  r0,  128
 mov  r4, -128
 .loop:
-movh m0, [r1]
-movh m2, [r2]
-movh m1, [r1+r3]
-movh m3, [r2+r3]
+movq m0, [r1]
+movq m2, [r2]
+%if mmsize == 8
+movq m1, m0
+movq m3, m2
+punpcklbwm0, m4
+punpckhbwm1, m4
+punpcklbwm2, m4
+punpckhbwm3, m4
+%else
+movq m1, [r1+r3]
+movq m3, [r2+r3]
 punpcklbwm0, m4
 punpcklbwm1, m4
 punpcklbwm2, m4
 punpcklbwm3, m4
+%endif
 psubwm0, m2
 psubwm1, m3
-mova [r0+r4+0 ], m0
-mova [r0+r4+16], m1
+mova  [r0+r4+0], m0
+mova  [r0+r4+mmsize], m1
+%if mmsize == 8
+add  r1, r3
+add  r2, r3
+%else
 lea  r1, [r1+r3*2]
 lea  r2, [r2+r3*2]
-add  r4, 32
+%endif
+add  r4, 2 * mmsize
 jne .loop
-RET
+REP_RET
+%endmacro
+
+INIT_MMX mmx
+DIFF_PIXELS
+
+INIT_XMM sse2
+DIFF_PIXELS
-- 
2.1.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel