Kaby Lake Pentium: - ff_h264_idct_add_8_sse2: ~1.18x faster than mmxext - ff_h264_idct_dc_add_8_sse2: ~1.07x faster than mmxext --- libavcodec/x86/h264_idct.asm | 11 +++++++++-- libavcodec/x86/h264dsp_init.c | 5 +++++ 2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm index 43f7791..5d83d91 100644 --- a/libavcodec/x86/h264_idct.asm +++ b/libavcodec/x86/h264_idct.asm @@ -1140,8 +1140,6 @@ IDCT_DC_DEQUANT 0 INIT_MMX sse2 IDCT_DC_DEQUANT 7 -INIT_XMM avx - ; %unmacro STORE_DIFFx2 8 ; remove macro from x86util.asm but yasm doesn't have this yet %macro STORE_DIFFx2 8 ; add1, add2, reg1, reg2, zero, shift, source, stride movd %3, [%7] @@ -1170,6 +1168,10 @@ INIT_XMM avx packuswb m1, m1 %endmacro +%macro IDCT_XMM 1 + +INIT_XMM %1 + cglobal h264_idct_add_8, 3, 3, 8, dst_, block_, stride_ movsxdifnidn stride_q, stride_d IDCT4_ADD dst_q, block_q, stride_q @@ -1182,3 +1184,8 @@ cglobal h264_idct_dc_add_8, 3, 4, 6, dst_, block_, stride_ DC_ADD_INIT r3 DC_ADD_MMXEXT_OP movd, dst_q, stride_q, r3 RET + +%endmacro + +IDCT_XMM sse2 +IDCT_XMM avx diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c index bf74937..ce7179f 100644 --- a/libavcodec/x86/h264dsp_init.c +++ b/libavcodec/x86/h264dsp_init.c @@ -32,9 +32,11 @@ void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT(uint8_t *dst, \ int stride); IDCT_ADD_FUNC(, 8, mmx) +IDCT_ADD_FUNC(, 8, sse2) IDCT_ADD_FUNC(, 8, avx) IDCT_ADD_FUNC(, 10, sse2) IDCT_ADD_FUNC(_dc, 8, mmxext) +IDCT_ADD_FUNC(_dc, 8, sse2) IDCT_ADD_FUNC(_dc, 8, avx) IDCT_ADD_FUNC(_dc, 10, mmxext) IDCT_ADD_FUNC(8_dc, 8, mmxext) @@ -316,6 +318,9 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_sse2; c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_sse2; } + + c->h264_idct_add = ff_h264_idct_add_8_sse2; + c->h264_idct_dc_add = ff_h264_idct_dc_add_8_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3; -- 2.8.3 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel