This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 93412ff6024c85958f49b40d1b04270f9552adcb Author: Andreas Rheinhardt <[email protected]> AuthorDate: Sun Jun 14 13:54:29 2026 +0200 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Wed Jun 17 14:33:28 2026 +0200 avcodec/x86/h264_intrapred: Avoid reg-reg moves Possible if src of the PRED4x4_LOWPASS macro is not used lateron. Saves 195B of .text here. Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/x86/h264_intrapred.asm | 248 +++++++++++++++++++------------------- 1 file changed, 125 insertions(+), 123 deletions(-) diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index 1074b474f0..ad4e267f03 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -795,7 +795,9 @@ cglobal pred8x8_tm_vp8_8, 2,3,6 mova %5, %2 pavgb %2, %3 pxor %3, %5 +%ifnidn %1, %4 mova %1, %4 +%endif pand %3, [pb_1] psubusb %2, %3 pavgb %1, %2 @@ -828,8 +830,8 @@ cglobal pred8x8l_top_dc_8, 4,4,6 pxor m1, m5 .has_topright: pxor m4, m4 - PRED4x4_LOWPASS m0, m2, m1, m3, m5 - psadbw m4, m0 + PRED4x4_LOWPASS m3, m2, m1, m3, m5 + psadbw m4, m3 paddw m4, [pw_4] psrlw m4, 3 SPLATW m4, m4, 0 @@ -901,9 +903,9 @@ cglobal pred8x8l_dc_8, 4,5 .do_left: movq mm0, mm4 PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 - movq mm4, mm0 + movq mm1, mm0 movq mm7, mm2 - PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 + PRED4x4_LOWPASS mm1, mm3, mm0, mm1, mm5 psllq mm1, 56 PALIGNR mm7, mm1, 7, mm3 movq mm0, [r0-8] @@ -919,12 +921,12 @@ cglobal pred8x8l_dc_8, 4,5 jz .fix_tr_1 .body: lea r1, [r0+r3*2] - PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5 + PRED4x4_LOWPASS mm3, mm2, mm1, mm3, mm5 pxor mm0, mm0 pxor mm1, mm1 lea r2, [r1+r3*2] psadbw mm0, mm7 - psadbw mm1, mm6 + psadbw mm1, mm3 paddw mm0, [pw_8] paddw mm0, mm1 lea r4, [r2+r3*2] @@ -985,9 +987,9 @@ cglobal pred8x8l_horizontal_8, 4,4 PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 movq mm4, mm0 movq mm7, mm2 - PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 - psllq mm1, 56 - PALIGNR mm7, mm1, 7, mm3 + PRED4x4_LOWPASS mm4, mm3, mm0, mm4, mm5 + psllq mm4, 56 + PALIGNR mm7, mm4, 7, mm3 movq mm3, mm7 lea r1, [r0+r3*2] movq mm7, mm3 @@ -1054,14 +1056,14 @@ cglobal pred8x8l_vertical_8, 4,4 psllq mm5, 56 pxor mm1, mm5 .body: - PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5 + PRED4x4_LOWPASS mm3, mm2, mm1, mm3, mm5 %rep 3 - movq [r0+r3*1], mm0 - movq [r0+r3*2], mm0 + movq [r0+r3*1], mm3 + movq [r0+r3*2], mm3 lea r0, [r0+r3*2] %endrep - movq [r0+r3*1], mm0 - movq [r0+r3*2], mm0 + movq [r0+r3*1], mm3 + movq [r0+r3*2], mm3 RET %endmacro @@ -1114,14 +1116,14 @@ cglobal pred8x8l_down_left_8, 4,4 movq2dq xmm3, mm4 test r2d, r2d ; top_right jz .fix_tr_2 - movq mm0, [r0+8] - movq mm5, mm0 - movq mm2, mm0 - movq mm4, mm0 + movq mm1, [r0+8] + movq mm5, mm1 + movq mm2, mm1 + movq mm4, mm1 psrlq mm5, 56 PALIGNR mm2, mm3, 7, mm3 PALIGNR mm5, mm4, 1, mm4 - PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4 + PRED4x4_LOWPASS mm1, mm2, mm5, mm1, mm4 .do_topright: movq2dq xmm4, mm1 psrlq mm1, 56 @@ -1137,24 +1139,24 @@ cglobal pred8x8l_down_left_8, 4,4 movdqa xmm1, xmm3 pslldq xmm1, 1 INIT_XMM cpuname - PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm4 - psrldq xmm0, 1 - movq [r0+r3*1], xmm0 - psrldq xmm0, 1 - movq [r0+r3*2], xmm0 - psrldq xmm0, 1 + PRED4x4_LOWPASS xmm3, xmm1, xmm2, xmm3, xmm4 + psrldq xmm3, 1 + movq [r0+r3*1], xmm3 + psrldq xmm3, 1 + movq [r0+r3*2], xmm3 + psrldq xmm3, 1 lea r0, [r2+r3*2] - movq [r1+r3*1], xmm0 - psrldq xmm0, 1 - movq [r1+r3*2], xmm0 - psrldq xmm0, 1 - movq [r2+r3*1], xmm0 - psrldq xmm0, 1 - movq [r2+r3*2], xmm0 - psrldq xmm0, 1 - movq [r0+r3*1], xmm0 - psrldq xmm0, 1 - movq [r0+r3*2], xmm0 + movq [r1+r3*1], xmm3 + psrldq xmm3, 1 + movq [r1+r3*2], xmm3 + psrldq xmm3, 1 + movq [r2+r3*1], xmm3 + psrldq xmm3, 1 + movq [r2+r3*2], xmm3 + psrldq xmm3, 1 + movq [r0+r3*1], xmm3 + psrldq xmm3, 1 + movq [r0+r3*2], xmm3 RET %endmacro @@ -1222,10 +1224,10 @@ cglobal pred8x8l_down_right_8, 4,5 .do_left: movq mm0, mm4 PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 - movq mm4, mm0 + movq mm1, mm0 movq mm7, mm2 movq2dq xmm3, mm2 - PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 + PRED4x4_LOWPASS mm1, mm3, mm0, mm1, mm5 psllq mm1, 56 PALIGNR mm7, mm1, 7, mm3 movq2dq xmm1, mm7 @@ -1241,8 +1243,8 @@ cglobal pred8x8l_down_right_8, 4,5 test r2d, r2d jz .fix_tr_1 .do_top: - PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5 - movq2dq xmm4, mm4 + PRED4x4_LOWPASS mm3, mm2, mm1, mm3, mm5 + movq2dq xmm4, mm3 lea r1, [r0+r3*2] movdqa xmm0, xmm3 pslldq xmm4, 8 @@ -1258,22 +1260,22 @@ cglobal pred8x8l_down_right_8, 4,5 movdqa xmm2, xmm3 psrldq xmm2, 1 INIT_XMM cpuname - PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm4 - movdqa xmm1, xmm0 + PRED4x4_LOWPASS xmm3, xmm1, xmm2, xmm3, xmm4 + movdqa xmm1, xmm3 psrldq xmm1, 1 - movq [r0+r3*2], xmm0 + movq [r0+r3*2], xmm3 movq [r0+r3*1], xmm1 - psrldq xmm0, 2 + psrldq xmm3, 2 psrldq xmm1, 2 - movq [r2+r3*2], xmm0 + movq [r2+r3*2], xmm3 movq [r2+r3*1], xmm1 - psrldq xmm0, 2 + psrldq xmm3, 2 psrldq xmm1, 2 - movq [r1+r3*2], xmm0 + movq [r1+r3*2], xmm3 movq [r1+r3*1], xmm1 - psrldq xmm0, 2 + psrldq xmm3, 2 psrldq xmm1, 2 - movq [r4+r3*2], xmm0 + movq [r4+r3*2], xmm3 movq [r4+r3*1], xmm1 RET %endmacro @@ -1340,8 +1342,8 @@ cglobal pred8x8l_vertical_right_8, 4,5,6 jmp .do_top .do_left: movq mm0, mm4 - PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 - movq2dq xmm0, mm2 + PRED4x4_LOWPASS mm3, mm1, mm4, mm3, mm5 + movq2dq xmm0, mm3 movq mm0, [r0-8] movq mm3, [r0] movq mm1, [r0+8] @@ -1354,9 +1356,9 @@ cglobal pred8x8l_vertical_right_8, 4,5,6 test r2d, r2d jz .fix_tr_1 .do_top: - PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5 + PRED4x4_LOWPASS mm3, mm2, mm1, mm3, mm5 lea r1, [r0+r3*2] - movq2dq xmm4, mm6 + movq2dq xmm4, mm3 pslldq xmm4, 8 por xmm0, xmm4 movdqa xmm1, xmm0 @@ -1367,19 +1369,19 @@ cglobal pred8x8l_vertical_right_8, 4,5,6 pslldq xmm1, 2 pavgb xmm2, xmm0 INIT_XMM cpuname - PRED4x4_LOWPASS xmm4, xmm3, xmm1, xmm0, xmm5 - movdqa xmm0, [pw_ff00] - pandn xmm0, xmm4 - movdqa xmm5, xmm4 - psrlw xmm4, 8 - packuswb xmm0, xmm4 - movhlps xmm4, xmm0 + PRED4x4_LOWPASS xmm0, xmm3, xmm1, xmm0, xmm5 + movdqa xmm4, [pw_ff00] + pandn xmm4, xmm0 + movdqa xmm5, xmm0 + psrlw xmm0, 8 + packuswb xmm4, xmm0 + movhlps xmm0, xmm4 movhps [r0+r3*2], xmm5 movhps [r0+r3*1], xmm2 psrldq xmm5, 4 - movss xmm5, xmm0 + movss xmm5, xmm4 psrldq xmm2, 4 - movss xmm2, xmm4 + movss xmm2, xmm0 lea r0, [r2+r3*2] psrldq xmm5, 1 psrldq xmm2, 1 @@ -1445,14 +1447,14 @@ cglobal pred8x8l_vertical_left_8, 4,4 movq2dq xmm4, mm4 test r2d, r2d jz .fix_tr_2 - movq mm0, [r0+8] - movq mm5, mm0 - movq mm2, mm0 - movq mm4, mm0 + movq mm1, [r0+8] + movq mm5, mm1 + movq mm2, mm1 + movq mm4, mm1 psrlq mm5, 56 PALIGNR mm2, mm3, 7, mm3 PALIGNR mm5, mm4, 1, mm4 - PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4 + PRED4x4_LOWPASS mm1, mm2, mm5, mm1, mm4 .do_topright: movq2dq xmm3, mm1 lea r1, [r0+r3*2] @@ -1466,23 +1468,23 @@ cglobal pred8x8l_vertical_left_8, 4,4 pavgb xmm3, xmm2 lea r2, [r1+r3*2] INIT_XMM cpuname - PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm4, xmm5 - psrldq xmm0, 1 + PRED4x4_LOWPASS xmm4, xmm1, xmm2, xmm4, xmm5 + psrldq xmm4, 1 movq [r0+r3*1], xmm3 - movq [r0+r3*2], xmm0 + movq [r0+r3*2], xmm4 lea r0, [r2+r3*2] psrldq xmm3, 1 - psrldq xmm0, 1 + psrldq xmm4, 1 movq [r1+r3*1], xmm3 - movq [r1+r3*2], xmm0 + movq [r1+r3*2], xmm4 psrldq xmm3, 1 - psrldq xmm0, 1 + psrldq xmm4, 1 movq [r2+r3*1], xmm3 - movq [r2+r3*2], xmm0 + movq [r2+r3*2], xmm4 psrldq xmm3, 1 - psrldq xmm0, 1 + psrldq xmm4, 1 movq [r0+r3*1], xmm3 - movq [r0+r3*2], xmm0 + movq [r0+r3*2], xmm4 RET %endmacro @@ -1527,9 +1529,9 @@ cglobal pred8x8l_horizontal_up_8, 4,4 PALIGNR mm1, mm2, 1, mm2 movq mm0, mm4 PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 - movq mm4, mm0 + movq mm1, mm0 movq mm7, mm2 - PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 + PRED4x4_LOWPASS mm1, mm3, mm0, mm1, mm5 psllq mm1, 56 PALIGNR mm7, mm1, 7, mm3 lea r1, [r0+r3*2] @@ -1643,8 +1645,8 @@ cglobal pred8x8l_horizontal_down_8, 4,5 PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 movq2dq xmm0, mm2 pslldq xmm0, 8 - movq mm4, mm0 - PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 + movq mm1, mm0 + PRED4x4_LOWPASS mm1, mm3, mm0, mm1, mm5 movq2dq xmm2, mm1 pslldq xmm2, 15 psrldq xmm2, 8 @@ -1665,14 +1667,14 @@ cglobal pred8x8l_horizontal_down_8, 4,5 movq2dq xmm1, mm4 test r2d, r2d jz .fix_tr_2 - movq mm0, [r0+8] - movq mm5, mm0 - movq mm2, mm0 - movq mm4, mm0 + movq mm1, [r0+8] + movq mm5, mm1 + movq mm2, mm1 + movq mm4, mm1 psrlq mm5, 56 PALIGNR mm2, mm3, 7, mm3 PALIGNR mm5, mm4, 1, mm4 - PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4 + PRED4x4_LOWPASS mm1, mm2, mm5, mm1, mm4 .do_topright: movq2dq xmm5, mm1 pslldq xmm5, 8 @@ -1688,23 +1690,23 @@ INIT_XMM cpuname movdqa xmm4, xmm1 pavgb xmm4, xmm3 lea r0, [r1+r3*2] - PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm5 - punpcklbw xmm4, xmm0 - movhlps xmm0, xmm4 + PRED4x4_LOWPASS xmm3, xmm1, xmm2, xmm3, xmm5 + punpcklbw xmm4, xmm3 + movhlps xmm3, xmm4 movq [r0+r3*2], xmm4 - movq [r2+r3*2], xmm0 + movq [r2+r3*2], xmm3 psrldq xmm4, 2 - psrldq xmm0, 2 + psrldq xmm3, 2 movq [r0+r3*1], xmm4 - movq [r2+r3*1], xmm0 + movq [r2+r3*1], xmm3 psrldq xmm4, 2 - psrldq xmm0, 2 + psrldq xmm3, 2 movq [r1+r3*2], xmm4 - movq [r4+r3*2], xmm0 + movq [r4+r3*2], xmm3 psrldq xmm4, 2 - psrldq xmm0, 2 + psrldq xmm3, 2 movq [r1+r3*1], xmm4 - movq [r4+r3*1], xmm0 + movq [r4+r3*1], xmm3 RET %endmacro @@ -1824,11 +1826,11 @@ cglobal pred4x4_vertical_vp8_8, 3,3 punpckldq m0, [r1] ;t0 t1 t2 t3 t4 t5 t6 t7 lea r1, [r0+r2*2] psrlq m0, 8 ;t1 t2 t3 t4 - PRED4x4_LOWPASS m3, m1, m0, m2, m4 - movd [r0+r2*1], m3 - movd [r0+r2*2], m3 - movd [r1+r2*1], m3 - movd [r1+r2*2], m3 + PRED4x4_LOWPASS m2, m1, m0, m2, m4 + movd [r0+r2*1], m2 + movd [r0+r2*2], m2 + movd [r1+r2*1], m2 + movd [r1+r2*2], m2 RET ;----------------------------------------------------------------------------- @@ -1841,12 +1843,12 @@ cglobal pred4x4_down_left_8, 3,3 movq m1, [r0] punpckldq m1, [r1] movq m2, m1 - movq m3, m1 + movq m0, m1 psllq m1, 8 pxor m2, m1 psrlq m2, 8 - pxor m2, m3 - PRED4x4_LOWPASS m0, m1, m2, m3, m4 + pxor m2, m0 + PRED4x4_LOWPASS m0, m1, m2, m0, m3 lea r1, [r0+r2*2] psrlq m0, 8 movd [r0+r2*1], m0 @@ -1868,13 +1870,13 @@ cglobal pred4x4_vertical_left_8, 3,3 sub r0, r2 movq m1, [r0] punpckldq m1, [r1] - movq m3, m1 + movq m0, m1 movq m2, m1 - psrlq m3, 8 + psrlq m0, 8 psrlq m2, 16 - movq m4, m3 + movq m4, m0 pavgb m4, m1 - PRED4x4_LOWPASS m0, m1, m2, m3, m5 + PRED4x4_LOWPASS m0, m1, m2, m0, m5 lea r1, [r0+r2*2] movh [r0+r2*1], m4 movh [r0+r2*2], m0 @@ -1908,8 +1910,8 @@ cglobal pred4x4_horizontal_up_8, 3,3 psrlq m2, 16 psrlq m3, 8 pavgb m7, m3 - PRED4x4_LOWPASS m4, m0, m2, m3, m5 - punpcklbw m7, m4 + PRED4x4_LOWPASS m3, m0, m2, m3, m5 + punpcklbw m7, m3 movd [r0+r2*1], m7 psrlq m7, 16 movd [r0+r2*2], m7 @@ -1943,16 +1945,16 @@ cglobal pred4x4_horizontal_down_8, 3,3 psrlq m0, 16 ; .. .. t2 t1 t0 lt l0 l1 psrlq m2, 8 ; .. t2 t1 t0 lt l0 l1 l2 pavgb m5, m2 - PRED4x4_LOWPASS m3, m1, m0, m2, m4 - punpcklbw m5, m3 - psrlq m3, 32 - PALIGNR m3, m5, 6, m4 + PRED4x4_LOWPASS m2, m1, m0, m2, m4 + punpcklbw m5, m2 + psrlq m2, 32 + PALIGNR m2, m5, 6, m4 movh [r1+r2*2], m5 psrlq m5, 16 movh [r1+r2*1], m5 psrlq m5, 16 movh [r0+r2*2], m5 - movh [r0+r2*1], m3 + movh [r0+r2*1], m2 RET ;----------------------------------------------------------------------------- @@ -1974,17 +1976,17 @@ cglobal pred4x4_vertical_right_8, 3,3 PALIGNR m0, [r0+r2*2-8], 7, m2 ; ..t3t2t1t0ltl0l1 movq m2, m0 PALIGNR m0, [r1+r2*1-8], 7, m3 ; t3t2t1t0ltl0l1l2 - PRED4x4_LOWPASS m3, m1, m0, m2, m4 - movq m1, m3 - psrlq m3, 16 + PRED4x4_LOWPASS m2, m1, m0, m2, m4 + movq m1, m2 + psrlq m2, 16 psllq m1, 48 movh [r0+r2*1], m5 - movh [r0+r2*2], m3 - PALIGNR m5, m1, 7, m2 + movh [r0+r2*2], m2 + PALIGNR m5, m1, 7, m3 psllq m1, 8 movh [r1+r2*1], m5 - PALIGNR m3, m1, 7, m1 - movh [r1+r2*2], m3 + PALIGNR m2, m1, 7, m1 + movh [r1+r2*2], m2 RET ;----------------------------------------------------------------------------- @@ -2004,9 +2006,9 @@ cglobal pred4x4_down_right_8, 3,3 PALIGNR m3, m1, 5, m1 movq m1, m3 PALIGNR m3, [r1+r2*1-8], 7, m4 - movq m2, m3 + movq m0, m3 PALIGNR m3, [r1+r2*2-8], 7, m4 - PRED4x4_LOWPASS m0, m3, m1, m2, m4 + PRED4x4_LOWPASS m0, m3, m1, m0, m4 movh [r1+r2*2], m0 psrlq m0, 8 movh [r1+r2*1], m0 _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
