This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

The following commit(s) were added to refs/heads/master by this push:
     new dc23adde9b avcodec/x86/h264_intrapred: Replace pred8x8_dc_8_mmxext 
with SSE2
dc23adde9b is described below

commit dc23adde9bc632aff88eb1306b44a17ab232a11f
Author:     Zuxy Meng <[email protected]>
AuthorDate: Sat Apr 11 21:09:13 2026 -0700
Commit:     Zuxy Meng <[email protected]>
CommitDate: Mon Apr 20 19:38:56 2026 -0700

    avcodec/x86/h264_intrapred: Replace pred8x8_dc_8_mmxext with SSE2
    
    Deprecating MMX w/o performance regression; nearly identical performance
    numbers on my Zen 4 (1.99x vs c)
    
    Signed-off-by: Zuxy Meng <[email protected]>
---
 libavcodec/x86/h264_intrapred.asm    | 30 ++++++++++++++----------------
 libavcodec/x86/h264_intrapred_init.c |  8 ++------
 2 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/libavcodec/x86/h264_intrapred.asm 
b/libavcodec/x86/h264_intrapred.asm
index 2e4a46ae76..b1c5ffa50d 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -634,18 +634,18 @@ cglobal pred8x8_top_dc_8, 2,5
     RET
 
 ;-----------------------------------------------------------------------------
-; void ff_pred8x8_dc_8_mmxext(uint8_t *src, ptrdiff_t stride)
+; void ff_pred8x8_dc_8_sse2(uint8_t *src, ptrdiff_t stride)
 ;-----------------------------------------------------------------------------
 
-INIT_MMX mmxext
-cglobal pred8x8_dc_8, 2,5
+INIT_XMM sse2
+cglobal pred8x8_dc_8, 2,5,5
     sub       r0, r1
-    pxor      m7, m7
+    pxor      m4, m4
     movd      m0, [r0+0]
     movd      m1, [r0+4]
-    psadbw    m0, m7            ; s0
+    psadbw    m0, m4            ; s0
     mov       r4, r0
-    psadbw    m1, m7            ; s1
+    psadbw    m1, m4            ; s1
 
     movzx    r2d, byte [r0+r1*1-1]
     movzx    r3d, byte [r0+r1*2-1]
@@ -671,27 +671,25 @@ cglobal pred8x8_dc_8, 2,5
     mov       r0, r4
     punpcklwd m2, m3
     punpckldq m0, m2            ; s0, s1, s2, s3
-    pshufw    m3, m0, 11110110b ; s2, s1, s3, s3
+    pshuflw   m3, m0, 11110110b ; s2, s1, s3, s3
     lea       r2, [r0+r1*2]
-    pshufw    m0, m0, 01110100b ; s0, s1, s3, s1
+    pshuflw   m0, m0, 01110100b ; s0, s1, s3, s1
     paddw     m0, m3
     lea       r3, [r2+r1*2]
     psrlw     m0, 2
-    pavgw     m0, m7            ; s0+s2, s1, s3, s1+s3
+    pavgw     m0, m4            ; s0+s2, s1, s3, s1+s3
     lea       r4, [r3+r1*2]
     packuswb  m0, m0
     punpcklbw m0, m0
-    movq      m1, m0
-    punpcklbw m0, m0
-    punpckhbw m1, m1
+    punpcklwd m0, m0
     movq [r0+r1*1], m0
     movq [r0+r1*2], m0
     movq [r2+r1*1], m0
     movq [r2+r1*2], m0
-    movq [r3+r1*1], m1
-    movq [r3+r1*2], m1
-    movq [r4+r1*1], m1
-    movq [r4+r1*2], m1
+    movhps [r3+r1*1], m0
+    movhps [r3+r1*2], m0
+    movhps [r4+r1*1], m0
+    movhps [r4+r1*2], m0
     RET
 
 ;-----------------------------------------------------------------------------
diff --git a/libavcodec/x86/h264_intrapred_init.c 
b/libavcodec/x86/h264_intrapred_init.c
index 0476980a57..44dd0cbde4 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -115,7 +115,7 @@ PRED16x16(tm_vp8, 8, avx2)
 
 PRED8x8(top_dc, 8, sse2)
 PRED8x8(dc_rv40, 8, mmxext)
-PRED8x8(dc, 8, mmxext)
+PRED8x8(dc, 8, sse2)
 PRED8x8(vertical, 8, sse2)
 PRED8x8(horizontal, 8, mmxext)
 PRED8x8(horizontal, 8, ssse3)
@@ -185,11 +185,6 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int 
codec_id,
             if (codec_id != AV_CODEC_ID_RV40) {
                 h->pred4x4  [HOR_UP_PRED        ] = 
ff_pred4x4_horizontal_up_8_mmxext;
             }
-            if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
-                if (chroma_format_idc <= 1) {
-                    h->pred8x8[DC_PRED8x8       ] = ff_pred8x8_dc_8_mmxext;
-                }
-            }
             if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
                 h->pred8x8  [DC_PRED8x8         ] = 
ff_pred8x8_dc_rv40_8_mmxext;
                 h->pred4x4  [TM_VP8_PRED        ] = ff_pred4x4_tm_vp8_8_mmxext;
@@ -213,6 +208,7 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int 
codec_id,
                 h->pred8x8  [VERT_PRED8x8     ] = ff_pred8x8_vertical_8_sse2;
                 if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == 
AV_CODEC_ID_H264) {
                     h->pred8x8 [TOP_DC_PRED8x8] = ff_pred8x8_top_dc_8_sse2;
+                    h->pred8x8 [DC_PRED8x8    ] = ff_pred8x8_dc_8_sse2;
                 }
             }
             if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to