ffmpeg | branch: master | James Almer <jamr...@gmail.com> | Sun Jul 13 03:00:50 2014 -0300| [276bef53406752b3ee9289c650bef2409cde6229] | committer: Michael Niedermayer
x86/hevc_deblock: add ff_hevc_[hv]_loop_filter_luma_{8, 10}_sse2 Signed-off-by: James Almer <jamr...@gmail.com> Reviewed-by: Kieran Kunhya <kier...@obe.tv> Signed-off-by: Michael Niedermayer <michae...@gmx.at> > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=276bef53406752b3ee9289c650bef2409cde6229 --- libavcodec/x86/hevc_deblock.asm | 8 +++++++- libavcodec/x86/hevcdsp_init.c | 31 ++++++++++++++++++++----------- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm index d23cac7..c035668 100644 --- a/libavcodec/x86/hevc_deblock.asm +++ b/libavcodec/x86/hevc_deblock.asm @@ -728,7 +728,7 @@ cglobal hevc_h_loop_filter_chroma_10, 3, 4, 7, pix, stride, tc, pix0 RET %if ARCH_X86_64 -INIT_XMM ssse3 +%macro LOOP_FILTER_LUMA 0 ;----------------------------------------------------------------------------- ; void ff_hevc_v_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int *_beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q); ;----------------------------------------------------------------------------- @@ -828,4 +828,10 @@ cglobal hevc_h_loop_filter_luma_10, 4, 15, 16, pix, stride, beta, tc, count, pix movdqu [pixq+2*strideq], m6; q2 .bypassluma: RET +%endmacro + +INIT_XMM sse2 +LOOP_FILTER_LUMA +INIT_XMM ssse3 +LOOP_FILTER_LUMA %endif diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index cad236d..2c76766 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -36,18 +36,20 @@ void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *_pix, void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *_pix, ptrdiff_t stride, int *_beta, int *_tc, \ uint8_t *_no_p, uint8_t *_no_q); -#define LFC_FUNCS(type, depth) \ -LFC_FUNC(h, depth, sse2) \ -LFC_FUNC(v, depth, sse2) +#define LFC_FUNCS(type, depth, opt) \ +LFC_FUNC(h, depth, opt) \ +LFC_FUNC(v, depth, opt) -#define LFL_FUNCS(type, depth) \ -LFL_FUNC(h, depth, ssse3) \ -LFL_FUNC(v, depth, ssse3) +#define LFL_FUNCS(type, depth, opt) \ +LFL_FUNC(h, depth, opt) \ +LFL_FUNC(v, depth, opt) -LFC_FUNCS(uint8_t, 8) -LFC_FUNCS(uint8_t, 10) -LFL_FUNCS(uint8_t, 8) -LFL_FUNCS(uint8_t, 10) +LFC_FUNCS(uint8_t, 8, sse2) +LFC_FUNCS(uint8_t, 10, sse2) +LFL_FUNCS(uint8_t, 8, sse2) +LFL_FUNCS(uint8_t, 10, sse2) +LFL_FUNCS(uint8_t, 8, ssse3) +LFL_FUNCS(uint8_t, 10, ssse3) #if HAVE_SSE2_EXTERNAL void ff_hevc_idct32_dc_add_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) @@ -429,6 +431,10 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (EXTERNAL_SSE2(mm_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2; + if (ARCH_X86_64) { + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2; + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2; + } c->transform_dc_add[2] = ff_hevc_idct16_dc_add_8_sse2; c->transform_dc_add[3] = ff_hevc_idct32_dc_add_8_sse2; @@ -460,7 +466,10 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (EXTERNAL_SSE2(mm_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2; - + if (ARCH_X86_64) { + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2; + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2; + } c->transform_dc_add[1] = ff_hevc_idct8_dc_add_10_sse2; c->transform_dc_add[2] = ff_hevc_idct16_dc_add_10_sse2; _______________________________________________ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog