This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit eb7f4b4e792cd8f96cd05b156618a8d88840b25b Author: Andreas Rheinhardt <[email protected]> AuthorDate: Sun Jun 14 14:27:00 2026 +0200 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Wed Jun 17 14:33:28 2026 +0200 avcodec/x86/h264_intrapred: Add AVX2 horizontal pred versions pred8x8_horizontal_8_c: 6.9 pred8x8_horizontal_8_sse2: 9.9 ( 0.70x) pred8x8_horizontal_8_ssse3: 9.5 ( 0.73x) pred8x8_horizontal_8_avx2: 5.1 ( 1.35x) pred16x16_horizontal_8_c: 10.9 pred16x16_horizontal_8_sse2: 15.0 ( 0.72x) pred16x16_horizontal_8_ssse3: 11.7 ( 0.93x) pred16x16_horizontal_8_avx2: 9.6 ( 1.13x) The new functions are cheap and only occupy 2*48B. Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/x86/h264_intrapred.asm | 18 ++++++++++++++++-- libavcodec/x86/h264_intrapred_init.c | 5 +++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index ad4e267f03..d3bf6a627f 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -71,10 +71,14 @@ cglobal pred16x16_vertical_8, 2,3 %macro PRED16x16_H 0 cglobal pred16x16_horizontal_8, 2,3 mov r2, 8 -%if cpuflag(ssse3) +%if cpuflag(ssse3) && notcpuflag(avx2) mova m2, [pb_3] %endif .loop: +%if cpuflag(avx2) + vpbroadcastb m0, [r0+r1*0-1] + vpbroadcastb m1, [r0+r1*1-1] +%else movd m0, [r0+r1*0-4] movd m1, [r0+r1*1-4] @@ -86,6 +90,7 @@ cglobal pred16x16_horizontal_8, 2,3 punpcklbw m1, m1 SPLATW m0, m0, 3 SPLATW m1, m1, 3 +%endif %endif mova [r0+r1*0], m0 @@ -100,6 +105,8 @@ INIT_XMM sse2 PRED16x16_H INIT_XMM ssse3 PRED16x16_H +INIT_XMM avx2 +PRED16x16_H ;----------------------------------------------------------------------------- ; void ff_pred16x16_dc_8(uint8_t *src, ptrdiff_t stride) @@ -586,12 +593,17 @@ cglobal pred8x8_vertical_8, 2,2 %macro PRED8x8_H 0 cglobal pred8x8_horizontal_8, 2,3,3 mov r2, 4 -%if cpuflag(ssse3) +%if cpuflag(ssse3) && notcpuflag(avx2) mova m2, [pb_3] %endif .loop: +%if cpuflag(avx2) + vpbroadcastb m0, [r0+r1*0-1] + vpbroadcastb m1, [r0+r1*1-1] +%else SPLATB_LOAD m0, r0+r1*0-1, m2 SPLATB_LOAD m1, r0+r1*1-1, m2 +%endif movq [r0+r1*0], m0 movq [r0+r1*1], m1 lea r0, [r0+r1*2] @@ -604,6 +616,8 @@ INIT_XMM sse2 PRED8x8_H INIT_XMM ssse3 PRED8x8_H +INIT_XMM avx2 +PRED8x8_H ;----------------------------------------------------------------------------- ; void ff_pred8x8_top_dc_8_sse2(uint8_t *src, ptrdiff_t stride) diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c index 5b308f658f..b5d82694a2 100644 --- a/libavcodec/x86/h264_intrapred_init.c +++ b/libavcodec/x86/h264_intrapred_init.c @@ -102,6 +102,7 @@ PRED16x16(horizontal, 10, sse2) PRED16x16(vertical, 8, sse) PRED16x16(horizontal, 8, sse2) PRED16x16(horizontal, 8, ssse3) +PRED16x16(horizontal, 8, avx2) PRED16x16(dc, 8, sse2) PRED16x16(dc, 8, ssse3) PRED16x16(plane_h264, 8, sse2) @@ -119,6 +120,7 @@ PRED8x8(dc, 8, sse2) PRED8x8(vertical, 8, sse2) PRED8x8(horizontal, 8, sse2) PRED8x8(horizontal, 8, ssse3) +PRED8x8(horizontal, 8, avx2) PRED8x8(plane, 8, sse2) PRED8x8(plane, 8, ssse3) PRED8x8(tm_vp8, 8, sse2) @@ -256,6 +258,9 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, } if(EXTERNAL_AVX2(cpu_flags)){ + h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_avx2; + if (chroma_format_idc <= 1) + h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_8_avx2; if (codec_id == AV_CODEC_ID_VP8) { h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_8_avx2; } _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
