Re: [libav-devel] [PATCH] dsputil: x86: Convert some inline asm to yasm
On Tue, Jan 22, 2013 at 04:40:34PM -0500, Daniel Kang wrote: --- a/libavcodec/x86/dsputil_avg_template.c +++ b/libavcodec/x86/dsputil_avg_template.c @@ -24,781 +24,32 @@ //FIXME the following could be optimized too ... +static void DEF(ff_put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +DEF(ff_put_no_rnd_pixels8_x2)(block , pixels , line_size, h); +DEF(ff_put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h); } +static void DEF(ff_put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +DEF(ff_put_pixels8_y2)(block , pixels , line_size, h); +DEF(ff_put_pixels8_y2)(block+8, pixels+8, line_size, h); } +static void DEF(ff_put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +DEF(ff_put_no_rnd_pixels8_y2)(block , pixels , line_size, h); +DEF(ff_put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h); } +static void DEF(ff_avg_pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +DEF(ff_avg_pixels8)(block , pixels , line_size, h); +DEF(ff_avg_pixels8)(block+8, pixels+8, line_size, h); } +static void DEF(ff_avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +DEF(ff_avg_pixels8_x2)(block , pixels , line_size, h); +DEF(ff_avg_pixels8_x2)(block+8, pixels+8, line_size, h); } +static void DEF(ff_avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +DEF(ff_avg_pixels8_y2)(block , pixels , line_size, h); +DEF(ff_avg_pixels8_y2)(block+8, pixels+8, line_size, h); } +static void DEF(ff_avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +DEF(ff_avg_pixels8_xy2)(block , pixels , line_size, h); +DEF(ff_avg_pixels8_xy2)(block+8, pixels+8, line_size, h); } Moving this to a macro and deleting the file seems saner to me. Maybe there are other opinions though... --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -83,6 +83,147 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE) = { 0xFEFEFEFEFEFEFEFEULL, 0xFEF +#if HAVE_YASM +/* VC-1-specific */ +#define ff_put_pixels8_mmx ff_put_pixels8_mmxext +void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, + int stride, int rnd) +{ +ff_put_pixels8_mmx(dst, src, stride, 8); +} + +void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, + int stride, int rnd) +{ +ff_avg_pixels8_mmxext(dst, src, stride, 8); +} Is this used outside of VC-1? If no, this should be split out and moved to a VC-1-specific file. +/***/ +/* 3Dnow specific */ + +#define DEF(x) x ## _3dnow + +#include dsputil_avg_template.c + +#undef DEF + +/***/ +/* MMXEXT specific */ + +#define DEF(x) x ## _mmxext + +#include dsputil_avg_template.c + +#undef DEF + + + +#endif /* HAVE_YASM */ + + + + #if HAVE_INLINE_ASM nit: stray large amount of empty lines --- a/libavcodec/x86/dsputil.asm +++ b/libavcodec/x86/dsputil.asm @@ -879,3 +884,986 @@ cglobal avg_pixels16, 4,5,4 lea r0, [r0+r2*4] jnz .loop REP_RET + + + + +; HPEL mmxext +%macro PAVGB_OP 2 nit: 4 empty lines looks slightly weird; in that file 2 empty lines between unrelated blocks seem to be the norm. Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] dsputil: x86: Convert some inline asm to yasm
On 22/01/13 22:40, Daniel Kang wrote: Specifically dsputil_avg_template.c and mpeg4 qpel dsputil: x86: Convert mpeg4 qpel and dsputil avg to yasm Maybe? --- Remove some cosmetic changes --- libavcodec/x86/dsputil.asm| 988 + libavcodec/x86/dsputil_avg_template.c | 791 +- libavcodec/x86/dsputil_mmx.c | 927 --- libavcodec/x86/h264_qpel.c| 22 - libavcodec/x86/vc1dsp_mmx.c |4 + 5 files changed, 1357 insertions(+), 1375 deletions(-) Looks ok to me if looks ok to Loren. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] dsputil: x86: Convert some inline asm to yasm
On Tue, Jan 22, 2013 at 5:10 PM, Diego Biurrun di...@biurrun.de wrote: On Tue, Jan 22, 2013 at 04:40:34PM -0500, Daniel Kang wrote: --- a/libavcodec/x86/dsputil_avg_template.c +++ b/libavcodec/x86/dsputil_avg_template.c @@ -24,781 +24,32 @@ //FIXME the following could be optimized too ... +static void DEF(ff_put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +DEF(ff_put_no_rnd_pixels8_x2)(block , pixels , line_size, h); +DEF(ff_put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h); } +static void DEF(ff_put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +DEF(ff_put_pixels8_y2)(block , pixels , line_size, h); +DEF(ff_put_pixels8_y2)(block+8, pixels+8, line_size, h); } +static void DEF(ff_put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +DEF(ff_put_no_rnd_pixels8_y2)(block , pixels , line_size, h); +DEF(ff_put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h); } +static void DEF(ff_avg_pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +DEF(ff_avg_pixels8)(block , pixels , line_size, h); +DEF(ff_avg_pixels8)(block+8, pixels+8, line_size, h); } +static void DEF(ff_avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +DEF(ff_avg_pixels8_x2)(block , pixels , line_size, h); +DEF(ff_avg_pixels8_x2)(block+8, pixels+8, line_size, h); } +static void DEF(ff_avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +DEF(ff_avg_pixels8_y2)(block , pixels , line_size, h); +DEF(ff_avg_pixels8_y2)(block+8, pixels+8, line_size, h); } +static void DEF(ff_avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ +DEF(ff_avg_pixels8_xy2)(block , pixels , line_size, h); +DEF(ff_avg_pixels8_xy2)(block+8, pixels+8, line_size, h); } Moving this to a macro and deleting the file seems saner to me. Maybe there are other opinions though... I was trying to avoid more macro hell in dsputil. Suggestions appreciated. --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -83,6 +83,147 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE) = { 0xFEFEFEFEFEFEFEFEULL, 0xFEF +#if HAVE_YASM +/* VC-1-specific */ +#define ff_put_pixels8_mmx ff_put_pixels8_mmxext +void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, + int stride, int rnd) +{ +ff_put_pixels8_mmx(dst, src, stride, 8); +} + +void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, + int stride, int rnd) +{ +ff_avg_pixels8_mmxext(dst, src, stride, 8); +} Is this used outside of VC-1? If no, this should be split out and moved to a VC-1-specific file. The avg and put pixels functions are. I am fairly confident the others aren't. +/***/ +/* 3Dnow specific */ + +#define DEF(x) x ## _3dnow + +#include dsputil_avg_template.c + +#undef DEF + +/***/ +/* MMXEXT specific */ + +#define DEF(x) x ## _mmxext + +#include dsputil_avg_template.c + +#undef DEF + + + +#endif /* HAVE_YASM */ + + + + #if HAVE_INLINE_ASM nit: stray large amount of empty lines Fixed. --- a/libavcodec/x86/dsputil.asm +++ b/libavcodec/x86/dsputil.asm @@ -879,3 +884,986 @@ cglobal avg_pixels16, 4,5,4 lea r0, [r0+r2*4] jnz .loop REP_RET + + + + +; HPEL mmxext +%macro PAVGB_OP 2 nit: 4 empty lines looks slightly weird; in that file 2 empty lines between unrelated blocks seem to be the norm. Fixed. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel