This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 761b6f235940e73e1002d83b06e2d2b99422db1f
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Fri Apr 10 04:23:23 2026 +0200
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Mon Apr 13 08:46:44 2026 +0200

    swscale/x86/output: Remove obsolete MMXEXT function
    
    Possible now that the SSE2 function is available
    even when the stack is not aligned.
    
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libswscale/x86/output.asm | 63 +++++++++--------------------------------------
 libswscale/x86/swscale.c  |  9 -------
 2 files changed, 11 insertions(+), 61 deletions(-)

diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index bbe15510f8..e1b369c551 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -112,23 +112,10 @@ SECTION .text
 ;-----------------------------------------------------------------------------
 %macro yuv2planeX_mainloop 2
 .pixelloop_%2:
-%assign %%i 0
-    ; the rep here is for the 8-bit output MMX case, where dither covers
-    ; 8 pixels but we can only handle 2 pixels per register, and thus 4
-    ; pixels per iteration. In order to not have to keep track of where
-    ; we are w.r.t. dithering, we unroll the MMX/8-bit loop x2.
-%if %1 == 8
-%assign %%repcnt 16/mmsize
-%else
-%assign %%repcnt 1
-%endif
-
-%rep %%repcnt
-
 %if %1 == 8
 %if ARCH_X86_32
-    mova            m2, [rsp+mmsize*(0+%%i)]
-    mova            m1, [rsp+mmsize*(1+%%i)]
+    mova            m2, [rsp]
+    mova            m1, [rsp+mmsize]
 %else ; x86-64
     mova            m2,  m8
     mova            m1,  m_dith
@@ -142,7 +129,7 @@ SECTION .text
 %else
     movsx     cntr_reg,  fltsizem
 %endif
-.filterloop_%2_ %+ %%i:
+.filterloop_%2:
     ; input pixels
     mov             r6, [srcq+gprsize*cntr_reg-2*gprsize]
 %if %1 == 16
@@ -189,7 +176,7 @@ SECTION .text
 %endif ; %1 == 8/9/10/16
 
     sub       cntr_reg,  2
-    jg .filterloop_%2_ %+ %%i
+    jg .filterloop_%2
 
 %if %1 == 16
     psrad           m2,  31 - %1
@@ -210,10 +197,10 @@ SECTION .text
 %else ; %1 == 9/10
 %if cpuflag(sse4)
     packusdw        m2,  m1
-%else ; mmxext/sse2
+%else ; sse2
     packssdw        m2,  m1
     pmaxsw          m2,  m6
-%endif ; mmxext/sse2/sse4/avx
+%endif ; sse2/sse4/avx
     pminsw          m2, [yuv2yuvX_%1_upper]
 %endif ; %1 == 9/10/16
     mov%2   [dstq+r5*2],  m2
@@ -222,8 +209,6 @@ SECTION .text
     add             r5,  mmsize/2
     sub             wd,  mmsize/2
 
-%assign %%i %%i+2
-%endrep
     jg .pixelloop_%2
 %endmacro
 
@@ -267,14 +252,9 @@ cglobal yuv2planeX_%1, %3, 8, %2, -STACK_SIZE, filter, 
fltsize, src, dst, w, dit
     movq        m_dith, [ditherq]        ; dither
     test        offsetd, offsetd
     jz              .no_rot
-%if mmsize == 16
     punpcklqdq  m_dith,  m_dith
     psrldq      m_dith,  3
-%else
-    PALIGNR     m_dith,  m_dith,  3,  m0
-%endif ; mmsize == 16
 .no_rot:
-%if mmsize == 16
     punpcklbw   m_dith,  m6
 %if ARCH_X86_64
     punpcklwd       m8,  m_dith,  m6
@@ -289,45 +269,24 @@ cglobal yuv2planeX_%1, %3, 8, %2, -STACK_SIZE, filter, 
fltsize, src, dst, w, dit
     mova      [rsp+ 0],  m5
     mova      [rsp+16],  m_dith
 %endif
-%else ; mmsize == 8
-    punpcklbw       m5,  m_dith,  m6
-    punpckhbw   m_dith,  m6
-    punpcklwd       m4,  m5,  m6
-    punpckhwd       m5,  m6
-    punpcklwd       m3,  m_dith,  m6
-    punpckhwd   m_dith,  m6
-    pslld           m4,  12
-    pslld           m5,  12
-    pslld           m3,  12
-    pslld       m_dith,  12
-    mova      [rsp+ 0],  m4
-    mova      [rsp+ 8],  m5
-    mova      [rsp+16],  m3
-    mova      [rsp+24],  m_dith
-%endif ; mmsize == 8/16
 %endif ; %1 == 8
 
     xor             r5,  r5
 
-%if mmsize == 8 || %1 == 8
+%if %1 == 8
     yuv2planeX_mainloop %1, a
-%else ; mmsize == 16
+%else ; %1 != 8
     test          dstq, 15
     jnz .unaligned
     yuv2planeX_mainloop %1, a
     RET
 .unaligned:
     yuv2planeX_mainloop %1, u
-%endif ; mmsize == 8/16
+%endif ; %1 == 8
 
     RET
 %endmacro
 
-%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
-INIT_MMX mmxext
-yuv2planeX_fn  8,  0, 7
-%endif
-
 INIT_XMM sse2
 yuv2planeX_fn  8, 10, 7
 yuv2planeX_fn  9,  7, 5
@@ -368,12 +327,12 @@ yuv2planeX_fn 10,  7, 5
 %if cpuflag(sse4) ; avx/sse4
     packusdw        m0, m1
     packusdw        m2, m3
-%else ; mmx/sse2
+%else ; sse2
     packssdw        m0, m1
     packssdw        m2, m3
     paddw           m0, m5
     paddw           m2, m5
-%endif ; mmx/sse2/sse4/avx
+%endif ; sse2/sse4/avx
     mov%2    [dstq+wq*2+mmsize*0], m0
     mov%2    [dstq+wq*2+mmsize*1], m2
 %else ; %1 == 9/10
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index f3aaa704f6..9bd4f526ba 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -267,7 +267,6 @@ void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t 
*filter, int filterSize, \
     VSCALEX_FUNC(9,  opt); \
     VSCALEX_FUNC(10, opt)
 
-VSCALEX_FUNC(8, mmxext);
 VSCALEX_FUNCS(sse2);
 VSCALEX_FUNCS(sse4);
 VSCALEX_FUNC(16, sse4);
@@ -509,14 +508,6 @@ av_cold void ff_sws_init_swscale_x86(SwsInternal *c)
             c->yuv2planeX = yuv2yuvX_avx2;
 #endif
     }
-#if ARCH_X86_32 && !HAVE_ALIGNED_STACK
-    // The better yuv2planeX_8 functions need aligned stack on x86-32,
-    // so we use MMXEXT in this case if they are not available.
-    if (EXTERNAL_MMXEXT(cpu_flags)) {
-        if (c->dstBpc == 8 && !c->use_mmx_vfilter)
-            c->yuv2planeX = ff_yuv2planeX_8_mmxext;
-    }
-#endif /* ARCH_X86_32 && !HAVE_ALIGNED_STACK */
 
 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
     if (c->srcBpc == 8) { \

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to