This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit eb7f4b4e792cd8f96cd05b156618a8d88840b25b
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Sun Jun 14 14:27:00 2026 +0200
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Wed Jun 17 14:33:28 2026 +0200

    avcodec/x86/h264_intrapred: Add AVX2 horizontal pred versions
    
    pred8x8_horizontal_8_c:                   6.9
    pred8x8_horizontal_8_sse2:                9.9 ( 0.70x)
    pred8x8_horizontal_8_ssse3:               9.5 ( 0.73x)
    pred8x8_horizontal_8_avx2:                5.1 ( 1.35x)
    
    pred16x16_horizontal_8_c:                10.9
    pred16x16_horizontal_8_sse2:             15.0 ( 0.72x)
    pred16x16_horizontal_8_ssse3:            11.7 ( 0.93x)
    pred16x16_horizontal_8_avx2:              9.6 ( 1.13x)
    
    The new functions are cheap and only occupy 2*48B.
    
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/h264_intrapred.asm    | 18 ++++++++++++++++--
 libavcodec/x86/h264_intrapred_init.c |  5 +++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/libavcodec/x86/h264_intrapred.asm 
b/libavcodec/x86/h264_intrapred.asm
index ad4e267f03..d3bf6a627f 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -71,10 +71,14 @@ cglobal pred16x16_vertical_8, 2,3
 %macro PRED16x16_H 0
 cglobal pred16x16_horizontal_8, 2,3
     mov       r2, 8
-%if cpuflag(ssse3)
+%if cpuflag(ssse3) && notcpuflag(avx2)
     mova      m2, [pb_3]
 %endif
 .loop:
+%if cpuflag(avx2)
+    vpbroadcastb m0, [r0+r1*0-1]
+    vpbroadcastb m1, [r0+r1*1-1]
+%else
     movd      m0, [r0+r1*0-4]
     movd      m1, [r0+r1*1-4]
 
@@ -86,6 +90,7 @@ cglobal pred16x16_horizontal_8, 2,3
     punpcklbw m1, m1
     SPLATW    m0, m0, 3
     SPLATW    m1, m1, 3
+%endif
 %endif
 
     mova [r0+r1*0], m0
@@ -100,6 +105,8 @@ INIT_XMM sse2
 PRED16x16_H
 INIT_XMM ssse3
 PRED16x16_H
+INIT_XMM avx2
+PRED16x16_H
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred16x16_dc_8(uint8_t *src, ptrdiff_t stride)
@@ -586,12 +593,17 @@ cglobal pred8x8_vertical_8, 2,2
 %macro PRED8x8_H 0
 cglobal pred8x8_horizontal_8, 2,3,3
     mov       r2, 4
-%if cpuflag(ssse3)
+%if cpuflag(ssse3) && notcpuflag(avx2)
     mova      m2, [pb_3]
 %endif
 .loop:
+%if cpuflag(avx2)
+    vpbroadcastb m0, [r0+r1*0-1]
+    vpbroadcastb m1, [r0+r1*1-1]
+%else
     SPLATB_LOAD m0, r0+r1*0-1, m2
     SPLATB_LOAD m1, r0+r1*1-1, m2
+%endif
     movq [r0+r1*0], m0
     movq [r0+r1*1], m1
     lea       r0, [r0+r1*2]
@@ -604,6 +616,8 @@ INIT_XMM sse2
 PRED8x8_H
 INIT_XMM ssse3
 PRED8x8_H
+INIT_XMM avx2
+PRED8x8_H
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred8x8_top_dc_8_sse2(uint8_t *src, ptrdiff_t stride)
diff --git a/libavcodec/x86/h264_intrapred_init.c 
b/libavcodec/x86/h264_intrapred_init.c
index 5b308f658f..b5d82694a2 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -102,6 +102,7 @@ PRED16x16(horizontal, 10, sse2)
 PRED16x16(vertical, 8, sse)
 PRED16x16(horizontal, 8, sse2)
 PRED16x16(horizontal, 8, ssse3)
+PRED16x16(horizontal, 8, avx2)
 PRED16x16(dc, 8, sse2)
 PRED16x16(dc, 8, ssse3)
 PRED16x16(plane_h264, 8, sse2)
@@ -119,6 +120,7 @@ PRED8x8(dc, 8, sse2)
 PRED8x8(vertical, 8, sse2)
 PRED8x8(horizontal, 8, sse2)
 PRED8x8(horizontal, 8, ssse3)
+PRED8x8(horizontal, 8, avx2)
 PRED8x8(plane, 8, sse2)
 PRED8x8(plane, 8, ssse3)
 PRED8x8(tm_vp8, 8, sse2)
@@ -256,6 +258,9 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int 
codec_id,
         }
 
         if(EXTERNAL_AVX2(cpu_flags)){
+            h->pred16x16[HOR_PRED8x8          ] = 
ff_pred16x16_horizontal_8_avx2;
+            if (chroma_format_idc <= 1)
+                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_8_avx2;
             if (codec_id == AV_CODEC_ID_VP8) {
                 h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_avx2;
             }

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to