This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 93412ff6024c85958f49b40d1b04270f9552adcb
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Sun Jun 14 13:54:29 2026 +0200
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Wed Jun 17 14:33:28 2026 +0200

    avcodec/x86/h264_intrapred: Avoid reg-reg moves
    
    Possible if src of the PRED4x4_LOWPASS macro is not used lateron.
    Saves 195B of .text here.
    
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/h264_intrapred.asm | 248 +++++++++++++++++++-------------------
 1 file changed, 125 insertions(+), 123 deletions(-)

diff --git a/libavcodec/x86/h264_intrapred.asm 
b/libavcodec/x86/h264_intrapred.asm
index 1074b474f0..ad4e267f03 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -795,7 +795,9 @@ cglobal pred8x8_tm_vp8_8, 2,3,6
     mova    %5, %2
     pavgb   %2, %3
     pxor    %3, %5
+%ifnidn %1, %4
     mova    %1, %4
+%endif
     pand    %3, [pb_1]
     psubusb %2, %3
     pavgb   %1, %2
@@ -828,8 +830,8 @@ cglobal pred8x8l_top_dc_8, 4,4,6
     pxor         m1, m5
 .has_topright:
     pxor     m4, m4
-    PRED4x4_LOWPASS m0, m2, m1, m3, m5
-    psadbw   m4, m0
+    PRED4x4_LOWPASS m3, m2, m1, m3, m5
+    psadbw   m4, m3
     paddw    m4, [pw_4]
     psrlw    m4, 3
     SPLATW   m4, m4, 0
@@ -901,9 +903,9 @@ cglobal pred8x8l_dc_8, 4,5
 .do_left:
     movq        mm0, mm4
     PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
-    movq        mm4, mm0
+    movq        mm1, mm0
     movq        mm7, mm2
-    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm1, mm5
     psllq       mm1, 56
     PALIGNR     mm7, mm1, 7, mm3
     movq        mm0, [r0-8]
@@ -919,12 +921,12 @@ cglobal pred8x8l_dc_8, 4,5
     jz .fix_tr_1
 .body:
     lea          r1, [r0+r3*2]
-    PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
+    PRED4x4_LOWPASS mm3, mm2, mm1, mm3, mm5
     pxor        mm0, mm0
     pxor        mm1, mm1
     lea          r2, [r1+r3*2]
     psadbw      mm0, mm7
-    psadbw      mm1, mm6
+    psadbw      mm1, mm3
     paddw       mm0, [pw_8]
     paddw       mm0, mm1
     lea          r4, [r2+r3*2]
@@ -985,9 +987,9 @@ cglobal pred8x8l_horizontal_8, 4,4
     PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
     movq        mm4, mm0
     movq        mm7, mm2
-    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
-    psllq       mm1, 56
-    PALIGNR     mm7, mm1, 7, mm3
+    PRED4x4_LOWPASS mm4, mm3, mm0, mm4, mm5
+    psllq       mm4, 56
+    PALIGNR     mm7, mm4, 7, mm3
     movq        mm3, mm7
     lea         r1, [r0+r3*2]
     movq       mm7, mm3
@@ -1054,14 +1056,14 @@ cglobal pred8x8l_vertical_8, 4,4
     psllq       mm5, 56
     pxor        mm1, mm5
 .body:
-    PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
+    PRED4x4_LOWPASS mm3, mm2, mm1, mm3, mm5
 %rep 3
-    movq [r0+r3*1], mm0
-    movq [r0+r3*2], mm0
+    movq [r0+r3*1], mm3
+    movq [r0+r3*2], mm3
     lea    r0, [r0+r3*2]
 %endrep
-    movq [r0+r3*1], mm0
-    movq [r0+r3*2], mm0
+    movq [r0+r3*1], mm3
+    movq [r0+r3*2], mm3
     RET
 %endmacro
 
@@ -1114,14 +1116,14 @@ cglobal pred8x8l_down_left_8, 4,4
     movq2dq    xmm3, mm4
     test        r2d, r2d ; top_right
     jz .fix_tr_2
-    movq        mm0, [r0+8]
-    movq        mm5, mm0
-    movq        mm2, mm0
-    movq        mm4, mm0
+    movq        mm1, [r0+8]
+    movq        mm5, mm1
+    movq        mm2, mm1
+    movq        mm4, mm1
     psrlq       mm5, 56
     PALIGNR     mm2, mm3, 7, mm3
     PALIGNR     mm5, mm4, 1, mm4
-    PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4
+    PRED4x4_LOWPASS mm1, mm2, mm5, mm1, mm4
 .do_topright:
     movq2dq    xmm4, mm1
     psrlq       mm1, 56
@@ -1137,24 +1139,24 @@ cglobal pred8x8l_down_left_8, 4,4
     movdqa    xmm1, xmm3
     pslldq    xmm1, 1
 INIT_XMM cpuname
-    PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm4
-    psrldq    xmm0, 1
-    movq [r0+r3*1], xmm0
-    psrldq    xmm0, 1
-    movq [r0+r3*2], xmm0
-    psrldq    xmm0, 1
+    PRED4x4_LOWPASS xmm3, xmm1, xmm2, xmm3, xmm4
+    psrldq    xmm3, 1
+    movq [r0+r3*1], xmm3
+    psrldq    xmm3, 1
+    movq [r0+r3*2], xmm3
+    psrldq    xmm3, 1
     lea         r0, [r2+r3*2]
-    movq [r1+r3*1], xmm0
-    psrldq    xmm0, 1
-    movq [r1+r3*2], xmm0
-    psrldq    xmm0, 1
-    movq [r2+r3*1], xmm0
-    psrldq    xmm0, 1
-    movq [r2+r3*2], xmm0
-    psrldq    xmm0, 1
-    movq [r0+r3*1], xmm0
-    psrldq    xmm0, 1
-    movq [r0+r3*2], xmm0
+    movq [r1+r3*1], xmm3
+    psrldq    xmm3, 1
+    movq [r1+r3*2], xmm3
+    psrldq    xmm3, 1
+    movq [r2+r3*1], xmm3
+    psrldq    xmm3, 1
+    movq [r2+r3*2], xmm3
+    psrldq    xmm3, 1
+    movq [r0+r3*1], xmm3
+    psrldq    xmm3, 1
+    movq [r0+r3*2], xmm3
     RET
 %endmacro
 
@@ -1222,10 +1224,10 @@ cglobal pred8x8l_down_right_8, 4,5
 .do_left:
     movq        mm0, mm4
     PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
-    movq        mm4, mm0
+    movq        mm1, mm0
     movq        mm7, mm2
     movq2dq    xmm3, mm2
-    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm1, mm5
     psllq       mm1, 56
     PALIGNR     mm7, mm1, 7, mm3
     movq2dq    xmm1, mm7
@@ -1241,8 +1243,8 @@ cglobal pred8x8l_down_right_8, 4,5
     test        r2d, r2d
     jz .fix_tr_1
 .do_top:
-    PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
-    movq2dq   xmm4, mm4
+    PRED4x4_LOWPASS mm3, mm2, mm1, mm3, mm5
+    movq2dq   xmm4, mm3
     lea         r1, [r0+r3*2]
     movdqa    xmm0, xmm3
     pslldq    xmm4, 8
@@ -1258,22 +1260,22 @@ cglobal pred8x8l_down_right_8, 4,5
     movdqa    xmm2, xmm3
     psrldq    xmm2, 1
 INIT_XMM cpuname
-    PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm4
-    movdqa    xmm1, xmm0
+    PRED4x4_LOWPASS xmm3, xmm1, xmm2, xmm3, xmm4
+    movdqa    xmm1, xmm3
     psrldq    xmm1, 1
-    movq [r0+r3*2], xmm0
+    movq [r0+r3*2], xmm3
     movq [r0+r3*1], xmm1
-    psrldq    xmm0, 2
+    psrldq    xmm3, 2
     psrldq    xmm1, 2
-    movq [r2+r3*2], xmm0
+    movq [r2+r3*2], xmm3
     movq [r2+r3*1], xmm1
-    psrldq    xmm0, 2
+    psrldq    xmm3, 2
     psrldq    xmm1, 2
-    movq [r1+r3*2], xmm0
+    movq [r1+r3*2], xmm3
     movq [r1+r3*1], xmm1
-    psrldq    xmm0, 2
+    psrldq    xmm3, 2
     psrldq    xmm1, 2
-    movq [r4+r3*2], xmm0
+    movq [r4+r3*2], xmm3
     movq [r4+r3*1], xmm1
     RET
 %endmacro
@@ -1340,8 +1342,8 @@ cglobal pred8x8l_vertical_right_8, 4,5,6
     jmp .do_top
 .do_left:
     movq        mm0, mm4
-    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
-    movq2dq    xmm0, mm2
+    PRED4x4_LOWPASS mm3, mm1, mm4, mm3, mm5
+    movq2dq    xmm0, mm3
     movq        mm0, [r0-8]
     movq        mm3, [r0]
     movq        mm1, [r0+8]
@@ -1354,9 +1356,9 @@ cglobal pred8x8l_vertical_right_8, 4,5,6
     test        r2d, r2d
     jz .fix_tr_1
 .do_top:
-    PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
+    PRED4x4_LOWPASS mm3, mm2, mm1, mm3, mm5
     lea           r1, [r0+r3*2]
-    movq2dq     xmm4, mm6
+    movq2dq     xmm4, mm3
     pslldq      xmm4, 8
     por         xmm0, xmm4
     movdqa      xmm1, xmm0
@@ -1367,19 +1369,19 @@ cglobal pred8x8l_vertical_right_8, 4,5,6
     pslldq      xmm1, 2
     pavgb       xmm2, xmm0
 INIT_XMM cpuname
-    PRED4x4_LOWPASS xmm4, xmm3, xmm1, xmm0, xmm5
-    movdqa      xmm0, [pw_ff00]
-    pandn       xmm0, xmm4
-    movdqa      xmm5, xmm4
-    psrlw       xmm4, 8
-    packuswb    xmm0, xmm4
-    movhlps     xmm4, xmm0
+    PRED4x4_LOWPASS xmm0, xmm3, xmm1, xmm0, xmm5
+    movdqa      xmm4, [pw_ff00]
+    pandn       xmm4, xmm0
+    movdqa      xmm5, xmm0
+    psrlw       xmm0, 8
+    packuswb    xmm4, xmm0
+    movhlps     xmm0, xmm4
     movhps [r0+r3*2], xmm5
     movhps [r0+r3*1], xmm2
     psrldq      xmm5, 4
-    movss       xmm5, xmm0
+    movss       xmm5, xmm4
     psrldq      xmm2, 4
-    movss       xmm2, xmm4
+    movss       xmm2, xmm0
     lea           r0, [r2+r3*2]
     psrldq      xmm5, 1
     psrldq      xmm2, 1
@@ -1445,14 +1447,14 @@ cglobal pred8x8l_vertical_left_8, 4,4
     movq2dq    xmm4, mm4
     test        r2d, r2d
     jz .fix_tr_2
-    movq        mm0, [r0+8]
-    movq        mm5, mm0
-    movq        mm2, mm0
-    movq        mm4, mm0
+    movq        mm1, [r0+8]
+    movq        mm5, mm1
+    movq        mm2, mm1
+    movq        mm4, mm1
     psrlq       mm5, 56
     PALIGNR     mm2, mm3, 7, mm3
     PALIGNR     mm5, mm4, 1, mm4
-    PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4
+    PRED4x4_LOWPASS mm1, mm2, mm5, mm1, mm4
 .do_topright:
     movq2dq   xmm3, mm1
     lea         r1, [r0+r3*2]
@@ -1466,23 +1468,23 @@ cglobal pred8x8l_vertical_left_8, 4,4
     pavgb     xmm3, xmm2
     lea         r2, [r1+r3*2]
 INIT_XMM cpuname
-    PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm4, xmm5
-    psrldq    xmm0, 1
+    PRED4x4_LOWPASS xmm4, xmm1, xmm2, xmm4, xmm5
+    psrldq    xmm4, 1
     movq [r0+r3*1], xmm3
-    movq [r0+r3*2], xmm0
+    movq [r0+r3*2], xmm4
     lea         r0, [r2+r3*2]
     psrldq    xmm3, 1
-    psrldq    xmm0, 1
+    psrldq    xmm4, 1
     movq [r1+r3*1], xmm3
-    movq [r1+r3*2], xmm0
+    movq [r1+r3*2], xmm4
     psrldq    xmm3, 1
-    psrldq    xmm0, 1
+    psrldq    xmm4, 1
     movq [r2+r3*1], xmm3
-    movq [r2+r3*2], xmm0
+    movq [r2+r3*2], xmm4
     psrldq    xmm3, 1
-    psrldq    xmm0, 1
+    psrldq    xmm4, 1
     movq [r0+r3*1], xmm3
-    movq [r0+r3*2], xmm0
+    movq [r0+r3*2], xmm4
     RET
 %endmacro
 
@@ -1527,9 +1529,9 @@ cglobal pred8x8l_horizontal_up_8, 4,4
     PALIGNR     mm1, mm2, 1, mm2
     movq       mm0, mm4
     PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
-    movq       mm4, mm0
+    movq       mm1, mm0
     movq       mm7, mm2
-    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm1, mm5
     psllq      mm1, 56
     PALIGNR    mm7, mm1, 7, mm3
     lea         r1, [r0+r3*2]
@@ -1643,8 +1645,8 @@ cglobal pred8x8l_horizontal_down_8, 4,5
     PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
     movq2dq    xmm0, mm2
     pslldq     xmm0, 8
-    movq        mm4, mm0
-    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    movq        mm1, mm0
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm1, mm5
     movq2dq    xmm2, mm1
     pslldq     xmm2, 15
     psrldq     xmm2, 8
@@ -1665,14 +1667,14 @@ cglobal pred8x8l_horizontal_down_8, 4,5
     movq2dq    xmm1, mm4
     test        r2d, r2d
     jz .fix_tr_2
-    movq        mm0, [r0+8]
-    movq        mm5, mm0
-    movq        mm2, mm0
-    movq        mm4, mm0
+    movq        mm1, [r0+8]
+    movq        mm5, mm1
+    movq        mm2, mm1
+    movq        mm4, mm1
     psrlq       mm5, 56
     PALIGNR     mm2, mm3, 7, mm3
     PALIGNR     mm5, mm4, 1, mm4
-    PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4
+    PRED4x4_LOWPASS mm1, mm2, mm5, mm1, mm4
 .do_topright:
     movq2dq    xmm5, mm1
     pslldq     xmm5, 8
@@ -1688,23 +1690,23 @@ INIT_XMM cpuname
     movdqa    xmm4, xmm1
     pavgb     xmm4, xmm3
     lea         r0, [r1+r3*2]
-    PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm5
-    punpcklbw xmm4, xmm0
-    movhlps   xmm0, xmm4
+    PRED4x4_LOWPASS xmm3, xmm1, xmm2, xmm3, xmm5
+    punpcklbw xmm4, xmm3
+    movhlps   xmm3, xmm4
     movq   [r0+r3*2], xmm4
-    movq   [r2+r3*2], xmm0
+    movq   [r2+r3*2], xmm3
     psrldq xmm4, 2
-    psrldq xmm0, 2
+    psrldq xmm3, 2
     movq   [r0+r3*1], xmm4
-    movq   [r2+r3*1], xmm0
+    movq   [r2+r3*1], xmm3
     psrldq xmm4, 2
-    psrldq xmm0, 2
+    psrldq xmm3, 2
     movq   [r1+r3*2], xmm4
-    movq   [r4+r3*2], xmm0
+    movq   [r4+r3*2], xmm3
     psrldq xmm4, 2
-    psrldq xmm0, 2
+    psrldq xmm3, 2
     movq   [r1+r3*1], xmm4
-    movq   [r4+r3*1], xmm0
+    movq   [r4+r3*1], xmm3
     RET
 %endmacro
 
@@ -1824,11 +1826,11 @@ cglobal pred4x4_vertical_vp8_8, 3,3
     punpckldq m0, [r1] ;t0 t1 t2 t3 t4 t5 t6 t7
     lea       r1, [r0+r2*2]
     psrlq     m0, 8    ;t1 t2 t3 t4
-    PRED4x4_LOWPASS m3, m1, m0, m2, m4
-    movd [r0+r2*1], m3
-    movd [r0+r2*2], m3
-    movd [r1+r2*1], m3
-    movd [r1+r2*2], m3
+    PRED4x4_LOWPASS m2, m1, m0, m2, m4
+    movd [r0+r2*1], m2
+    movd [r0+r2*2], m2
+    movd [r1+r2*1], m2
+    movd [r1+r2*2], m2
     RET
 
 ;-----------------------------------------------------------------------------
@@ -1841,12 +1843,12 @@ cglobal pred4x4_down_left_8, 3,3
     movq      m1, [r0]
     punpckldq m1, [r1]
     movq      m2, m1
-    movq      m3, m1
+    movq      m0, m1
     psllq     m1, 8
     pxor      m2, m1
     psrlq     m2, 8
-    pxor      m2, m3
-    PRED4x4_LOWPASS m0, m1, m2, m3, m4
+    pxor      m2, m0
+    PRED4x4_LOWPASS m0, m1, m2, m0, m3
     lea       r1, [r0+r2*2]
     psrlq     m0, 8
     movd      [r0+r2*1], m0
@@ -1868,13 +1870,13 @@ cglobal pred4x4_vertical_left_8, 3,3
     sub       r0, r2
     movq      m1, [r0]
     punpckldq m1, [r1]
-    movq      m3, m1
+    movq      m0, m1
     movq      m2, m1
-    psrlq     m3, 8
+    psrlq     m0, 8
     psrlq     m2, 16
-    movq      m4, m3
+    movq      m4, m0
     pavgb     m4, m1
-    PRED4x4_LOWPASS m0, m1, m2, m3, m5
+    PRED4x4_LOWPASS m0, m1, m2, m0, m5
     lea       r1, [r0+r2*2]
     movh      [r0+r2*1], m4
     movh      [r0+r2*2], m0
@@ -1908,8 +1910,8 @@ cglobal pred4x4_horizontal_up_8, 3,3
     psrlq     m2, 16
     psrlq     m3, 8
     pavgb     m7, m3
-    PRED4x4_LOWPASS m4, m0, m2, m3, m5
-    punpcklbw m7, m4
+    PRED4x4_LOWPASS m3, m0, m2, m3, m5
+    punpcklbw m7, m3
     movd    [r0+r2*1], m7
     psrlq    m7, 16
     movd    [r0+r2*2], m7
@@ -1943,16 +1945,16 @@ cglobal pred4x4_horizontal_down_8, 3,3
     psrlq     m0, 16          ; .. .. t2 t1 t0 lt l0 l1
     psrlq     m2, 8           ; .. t2 t1 t0 lt l0 l1 l2
     pavgb     m5, m2
-    PRED4x4_LOWPASS m3, m1, m0, m2, m4
-    punpcklbw m5, m3
-    psrlq     m3, 32
-    PALIGNR   m3, m5, 6, m4
+    PRED4x4_LOWPASS m2, m1, m0, m2, m4
+    punpcklbw m5, m2
+    psrlq     m2, 32
+    PALIGNR   m2, m5, 6, m4
     movh      [r1+r2*2], m5
     psrlq     m5, 16
     movh      [r1+r2*1], m5
     psrlq     m5, 16
     movh      [r0+r2*2], m5
-    movh      [r0+r2*1], m3
+    movh      [r0+r2*1], m2
     RET
 
 ;-----------------------------------------------------------------------------
@@ -1974,17 +1976,17 @@ cglobal pred4x4_vertical_right_8, 3,3
     PALIGNR m0, [r0+r2*2-8], 7, m2      ; ..t3t2t1t0ltl0l1
     movq    m2, m0
     PALIGNR m0, [r1+r2*1-8], 7, m3      ; t3t2t1t0ltl0l1l2
-    PRED4x4_LOWPASS m3, m1, m0, m2, m4
-    movq    m1, m3
-    psrlq   m3, 16
+    PRED4x4_LOWPASS m2, m1, m0, m2, m4
+    movq    m1, m2
+    psrlq   m2, 16
     psllq   m1, 48
     movh    [r0+r2*1], m5
-    movh    [r0+r2*2], m3
-    PALIGNR m5, m1, 7, m2
+    movh    [r0+r2*2], m2
+    PALIGNR m5, m1, 7, m3
     psllq   m1, 8
     movh    [r1+r2*1], m5
-    PALIGNR m3, m1, 7, m1
-    movh    [r1+r2*2], m3
+    PALIGNR m2, m1, 7, m1
+    movh    [r1+r2*2], m2
     RET
 
 ;-----------------------------------------------------------------------------
@@ -2004,9 +2006,9 @@ cglobal pred4x4_down_right_8, 3,3
     PALIGNR   m3, m1, 5, m1
     movq      m1, m3
     PALIGNR   m3, [r1+r2*1-8], 7, m4
-    movq      m2, m3
+    movq      m0, m3
     PALIGNR   m3, [r1+r2*2-8], 7, m4
-    PRED4x4_LOWPASS m0, m3, m1, m2, m4
+    PRED4x4_LOWPASS m0, m3, m1, m0, m4
     movh      [r1+r2*2], m0
     psrlq     m0, 8
     movh      [r1+r2*1], m0

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to