Patch ok Mickael
Le mercredi 20 août 2014, James Almer <jamr...@gmail.com> a écrit : > ~15% faster than sse2 > > Signed-off-by: James Almer <jamr...@gmail.com <javascript:;>> > --- > libavcodec/x86/hevc_res_add.asm | 15 +++++++++++---- > libavcodec/x86/hevcdsp.h | 4 ++++ > libavcodec/x86/hevcdsp_init.c | 4 ++++ > 3 files changed, 19 insertions(+), 4 deletions(-) > > diff --git a/libavcodec/x86/hevc_res_add.asm > b/libavcodec/x86/hevc_res_add.asm > index 47022d3..feea50c 100644 > --- a/libavcodec/x86/hevc_res_add.asm > +++ b/libavcodec/x86/hevc_res_add.asm > @@ -156,8 +156,8 @@ cglobal hevc_transform_add4_8, 3, 4, 6 > %endmacro > > > -INIT_XMM sse2 > -; void ff_hevc_transform_add8_8_sse2(uint8_t *dst, int16_t *coeffs, > ptrdiff_t stride) > +%macro TRANSFORM_ADD_8 0 > +; void ff_hevc_transform_add8_8_<opt>(uint8_t *dst, int16_t *coeffs, > ptrdiff_t stride) > cglobal hevc_transform_add8_8, 3, 4, 8 > lea r3, [r2*3] > TR_ADD_SSE_8_8 > @@ -167,7 +167,7 @@ cglobal hevc_transform_add8_8, 3, 4, 8 > RET > > %if ARCH_X86_64 > -; void ff_hevc_transform_add16_8_sse2(uint8_t *dst, int16_t *coeffs, > ptrdiff_t stride) > +; void ff_hevc_transform_add16_8_<opt>(uint8_t *dst, int16_t *coeffs, > ptrdiff_t stride) > cglobal hevc_transform_add16_8, 3, 4, 12 > lea r3, [r2*3] > TR_ADD_SSE_16_8 > @@ -178,7 +178,7 @@ cglobal hevc_transform_add16_8, 3, 4, 12 > %endrep > RET > > -; void ff_hevc_transform_add16_8_sse2(uint8_t *dst, int16_t *coeffs, > ptrdiff_t stride) > +; void ff_hevc_transform_add32_8_<opt>(uint8_t *dst, int16_t *coeffs, > ptrdiff_t stride) > cglobal hevc_transform_add32_8, 3, 4, 12 > > TR_ADD_SSE_32_8 > @@ -190,6 +190,13 @@ cglobal hevc_transform_add32_8, 3, 4, 12 > RET > > %endif ;ARCH_X86_64 > +%endmacro > + > +INIT_XMM sse2 > +TRANSFORM_ADD_8 > +INIT_XMM avx > +TRANSFORM_ADD_8 > + > > > ;----------------------------------------------------------------------------- > ; void ff_hevc_transform_add_10(pixel *dst, int16_t *block, int stride) > > > ;----------------------------------------------------------------------------- > diff --git a/libavcodec/x86/hevcdsp.h b/libavcodec/x86/hevcdsp.h > index 7ced22c..74b5173 100644 > --- a/libavcodec/x86/hevcdsp.h > +++ b/libavcodec/x86/hevcdsp.h > @@ -139,6 +139,10 @@ void ff_hevc_transform_add8_8_sse2(uint8_t *dst, > int16_t *coeffs, ptrdiff_t stri > void ff_hevc_transform_add16_8_sse2(uint8_t *dst, int16_t *coeffs, > ptrdiff_t stride); > void ff_hevc_transform_add32_8_sse2(uint8_t *dst, int16_t *coeffs, > ptrdiff_t stride); > > +void ff_hevc_transform_add8_8_avx(uint8_t *dst, int16_t *coeffs, > ptrdiff_t stride); > +void ff_hevc_transform_add16_8_avx(uint8_t *dst, int16_t *coeffs, > ptrdiff_t stride); > +void ff_hevc_transform_add32_8_avx(uint8_t *dst, int16_t *coeffs, > ptrdiff_t stride); > + > void ff_hevc_transform_add4_10_mmxext(uint8_t *dst, int16_t *coeffs, > ptrdiff_t stride); > void ff_hevc_transform_add8_10_sse2(uint8_t *dst, int16_t *coeffs, > ptrdiff_t stride); > void ff_hevc_transform_add16_10_sse2(uint8_t *dst, int16_t *coeffs, > ptrdiff_t stride); > diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c > index 0f9fe7d..f6f0a4b 100644 > --- a/libavcodec/x86/hevcdsp_init.c > +++ b/libavcodec/x86/hevcdsp_init.c > @@ -509,7 +509,11 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const > int bit_depth) > if (ARCH_X86_64) { > c->hevc_v_loop_filter_luma = > ff_hevc_v_loop_filter_luma_8_avx; > c->hevc_h_loop_filter_luma = > ff_hevc_h_loop_filter_luma_8_avx; > + > + c->transform_add[2] = ff_hevc_transform_add16_8_avx; > + c->transform_add[3] = ff_hevc_transform_add32_8_avx; > } > + c->transform_add[1] = ff_hevc_transform_add8_8_avx; > } > if (EXTERNAL_AVX2(cpu_flags)) { > c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2; > -- > 1.8.5.5 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org <javascript:;> > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel