This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

The following commit(s) were added to refs/heads/master by this push:
     new 34dfa8bf2b avcodec/x86/h264_intrapred: SSE2 impl. of pred8x8l_top_dc_8
34dfa8bf2b is described below

commit 34dfa8bf2b86041c2b836629ec815f630eb38f33
Author:     Zuxy Meng <[email protected]>
AuthorDate: Tue Apr 21 20:43:03 2026 -0700
Commit:     kierank <[email protected]>
CommitDate: Mon May 25 07:05:09 2026 +0000

    avcodec/x86/h264_intrapred: SSE2 impl. of pred8x8l_top_dc_8
    
    Deprecate MMX with less instruction count. Remove the SSSE3 impl. as we
    no longer use palignr.
    
    pred8x8l_top_dc_8_mmxext:                                8.7 ( 2.29x)
    pred8x8l_top_dc_8_ssse3:                                 7.9 ( 2.51x)
    pred8x8l_top_dc_8_sse2:                                  6.7 ( 3.01x)
    
    Signed-off-by: Zuxy Meng <[email protected]>
---
 libavcodec/x86/h264_intrapred.asm    | 74 +++++++++++++++---------------------
 libavcodec/x86/h264_intrapred_init.c |  6 +--
 2 files changed, 32 insertions(+), 48 deletions(-)

diff --git a/libavcodec/x86/h264_intrapred.asm 
b/libavcodec/x86/h264_intrapred.asm
index 6cf01a228a..1cc0dd7f8f 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -805,57 +805,43 @@ cglobal pred8x8_tm_vp8_8, 2,3,6
 ; void ff_pred8x8l_top_dc_8(uint8_t *src, int has_topleft, int has_topright,
 ;                           ptrdiff_t stride)
 ;-----------------------------------------------------------------------------
-%macro PRED8x8L_TOP_DC 0
-cglobal pred8x8l_top_dc_8, 4,4
+INIT_XMM sse2
+cglobal pred8x8l_top_dc_8, 4,4,6
     sub          r0, r3
-    pxor        mm7, mm7
-    movq        mm0, [r0-8]
-    movq        mm3, [r0]
-    movq        mm1, [r0+8]
-    movq        mm2, mm3
-    movq        mm4, mm3
-    PALIGNR     mm2, mm0, 7, mm0
-    PALIGNR     mm1, mm4, 1, mm4
+    movu         m2, [r0-8]
+    movu         m3, [r0]
+    mova         m1, m3
+    psrldq       m2, 7
+    psrldq       m1, 1
     test        r1d, r1d ; top_left
-    jz .fix_lt_2
+    jnz .has_topleft
+    pxor         m5, m3, m2
+    psllq        m5, 56
+    psrlq        m5, 56
+    pxor         m2, m5
+.has_topleft:
     test        r2d, r2d ; top_right
-    jz .fix_tr_1
-    jmp .body
-.fix_lt_2:
-    movq        mm5, mm3
-    pxor        mm5, mm2
-    psllq       mm5, 56
-    psrlq       mm5, 56
-    pxor        mm2, mm5
-    test        r2d, r2d ; top_right
-    jnz .body
-.fix_tr_1:
-    movq        mm5, mm3
-    pxor        mm5, mm1
-    psrlq       mm5, 56
-    psllq       mm5, 56
-    pxor        mm1, mm5
-.body:
-    PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
-    psadbw   mm7, mm0
-    paddw    mm7, [pw_4]
-    psrlw    mm7, 3
-    pshufw   mm7, mm7, 0
-    packuswb mm7, mm7
+    jnz .has_topright
+    pxor         m5, m3, m1
+    psrlq        m5, 56
+    psllq        m5, 56
+    pxor         m1, m5
+.has_topright:
+    pxor     m4, m4
+    PRED4x4_LOWPASS m0, m2, m1, m3, m5
+    psadbw   m4, m0
+    paddw    m4, [pw_4]
+    psrlw    m4, 3
+    SPLATW   m4, m4, 0
+    packuswb m4, m4
 %rep 3
-    movq [r0+r3*1], mm7
-    movq [r0+r3*2], mm7
+    movq [r0+r3*1], m4
+    movq [r0+r3*2], m4
     lea    r0, [r0+r3*2]
 %endrep
-    movq [r0+r3*1], mm7
-    movq [r0+r3*2], mm7
+    movq [r0+r3*1], m4
+    movq [r0+r3*2], m4
     RET
-%endmacro
-
-INIT_MMX mmxext
-PRED8x8L_TOP_DC
-INIT_MMX ssse3
-PRED8x8L_TOP_DC
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred8x8l_dc_8(uint8_t *src, int has_topleft, int has_topright,
diff --git a/libavcodec/x86/h264_intrapred_init.c 
b/libavcodec/x86/h264_intrapred_init.c
index 697ce1af08..5b308f658f 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -124,8 +124,7 @@ PRED8x8(plane, 8, ssse3)
 PRED8x8(tm_vp8, 8, sse2)
 PRED8x8(tm_vp8, 8, ssse3)
 
-PRED8x8L(top_dc, 8, mmxext)
-PRED8x8L(top_dc, 8, ssse3)
+PRED8x8L(top_dc, 8, sse2)
 PRED8x8L(dc, 8, mmxext)
 PRED8x8L(dc, 8, ssse3)
 PRED8x8L(horizontal, 8, mmxext)
@@ -164,7 +163,6 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int 
codec_id,
 
     if (bit_depth == 8) {
         if (EXTERNAL_MMXEXT(cpu_flags)) {
-            h->pred8x8l [TOP_DC_PRED            ] = 
ff_pred8x8l_top_dc_8_mmxext;
             h->pred8x8l [DC_PRED                ] = ff_pred8x8l_dc_8_mmxext;
             h->pred8x8l [HOR_PRED               ] = 
ff_pred8x8l_horizontal_8_mmxext;
             h->pred8x8l [VERT_PRED              ] = 
ff_pred8x8l_vertical_8_mmxext;
@@ -197,6 +195,7 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int 
codec_id,
         if (EXTERNAL_SSE2(cpu_flags)) {
             h->pred16x16[HOR_PRED8x8          ] = 
ff_pred16x16_horizontal_8_sse2;
             h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_sse2;
+            h->pred8x8l [TOP_DC_PRED          ] = ff_pred8x8l_top_dc_8_sse2;
             h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_sse2;
             h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = 
ff_pred8x8l_down_right_8_sse2;
             h->pred8x8l [VERT_RIGHT_PRED      ] = 
ff_pred8x8l_vertical_right_8_sse2;
@@ -231,7 +230,6 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int 
codec_id,
             h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_ssse3;
             if (chroma_format_idc <= 1)
                 h->pred8x8  [HOR_PRED8x8      ] = 
ff_pred8x8_horizontal_8_ssse3;
-            h->pred8x8l [TOP_DC_PRED          ] = ff_pred8x8l_top_dc_8_ssse3;
             h->pred8x8l [DC_PRED              ] = ff_pred8x8l_dc_8_ssse3;
             h->pred8x8l [HOR_PRED             ] = 
ff_pred8x8l_horizontal_8_ssse3;
             h->pred8x8l [VERT_PRED            ] = ff_pred8x8l_vertical_8_ssse3;

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to