This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 8a7c1f7fb8d70337e736e72cdc1e8edf4981e307 Author: Andreas Rheinhardt <[email protected]> AuthorDate: Fri Apr 10 03:07:13 2026 +0200 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Mon Apr 13 08:46:44 2026 +0200 swscale/x86/output: Make xmm functions usable even without aligned stack x86-32 lacks one GPR, so it needs to be read from the stack. If the stack needs to be realigned, we can no longer access the original location of one argument, so just request a bit more stack size and copy said argument at a fixed offset from the new stack. Signed-off-by: Andreas Rheinhardt <[email protected]> --- libswscale/x86/output.asm | 31 +++++++++++++++++++------------ libswscale/x86/swscale.c | 13 +++++-------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm index 7d87dd919c..bbe15510f8 100644 --- a/libswscale/x86/output.asm +++ b/libswscale/x86/output.asm @@ -137,7 +137,11 @@ SECTION .text mova m1, [yuv2yuvX_%1_start] mova m2, m1 %endif ; %1 == 8/9/10/16 +%if ARCH_X86_32 && !HAVE_ALIGNED_STACK && (%1 == 8) + mov cntr_reg, [rsp+32] +%else movsx cntr_reg, fltsizem +%endif .filterloop_%2_ %+ %%i: ; input pixels mov r6, [srcq+gprsize*cntr_reg-2*gprsize] @@ -233,15 +237,27 @@ SECTION .text %define movsx movsxd %endif -cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset +%if %1 == 8 +%assign STACK_SIZE ARCH_X86_32*(32+mmsize*!HAVE_ALIGNED_STACK) +%else +%assign STACK_SIZE 0 +%endif + +cglobal yuv2planeX_%1, %3, 8, %2, -STACK_SIZE, filter, fltsize, src, dst, w, dither, offset %if %1 == 8 || %1 == 9 || %1 == 10 pxor m6, m6 %endif ; %1 == 8/9/10 %if %1 == 8 %if ARCH_X86_32 -%assign pad 0x2c - (stack_offset & 15) - SUB rsp, pad +%if !HAVE_ALIGNED_STACK + ; For 8-bit content on x86-32 we need the stack for both vector and GP regs. + ; If the stack is not suitably aligned, then x86inc aligns it for us, but + ; we can then no longer access the original location of fltsize, so copy + ; it here at a known offset of rsp. + mov [rsp+32], fltsized +%endif + %define m_dith m7 %else ; x86-64 %define m_dith m9 @@ -304,16 +320,7 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset yuv2planeX_mainloop %1, u %endif ; mmsize == 8/16 -%if %1 == 8 -%if ARCH_X86_32 - ADD rsp, pad - RET -%else ; x86-64 RET -%endif ; x86-32/64 -%else ; %1 == 9/10/16 - RET -%endif ; %1 == 8/9/10/16 %endmacro %if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0 diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 85faf92c56..f3aaa704f6 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -540,12 +540,12 @@ av_cold void ff_sws_init_swscale_x86(SwsInternal *c) ff_hscale16to19_ ## filtersize ## _ ## opt1; \ } \ } while (0) -#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \ +#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case) \ switch(c->dstBpc){ \ case 16: do_16_case; break; \ case 10: if (!isBE(c->opts.dst_format) && !isSemiPlanarYUV(c->opts.dst_format) && !isDataInHighBits(c->opts.dst_format)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \ case 9: if (!isBE(c->opts.dst_format)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \ - case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \ + case 8: if (!c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \ } #define ASSIGN_VSCALE_FUNC(vscalefn, opt) \ switch(c->dstBpc){ \ @@ -572,8 +572,7 @@ switch(c->dstBpc){ \ if (EXTERNAL_SSE2(cpu_flags)) { ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2); ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2); - ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, , - HAVE_ALIGNED_STACK || ARCH_X86_64); + ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ); if (!(c->opts.flags & SWS_ACCURATE_RND)) ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2); @@ -622,15 +621,13 @@ switch(c->dstBpc){ \ ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3); ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3); ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4, - if (!isBE(c->opts.dst_format)) c->yuv2planeX = ff_yuv2planeX_16_sse4, - HAVE_ALIGNED_STACK || ARCH_X86_64); + if (!isBE(c->opts.dst_format)) c->yuv2planeX = ff_yuv2planeX_16_sse4); if (c->dstBpc == 16 && !isBE(c->opts.dst_format) && !(c->opts.flags & SWS_ACCURATE_RND)) c->yuv2plane1 = ff_yuv2plane1_16_sse4; } if (EXTERNAL_AVX(cpu_flags)) { - ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, , - HAVE_ALIGNED_STACK || ARCH_X86_64); + ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ); if (!(c->opts.flags & SWS_ACCURATE_RND)) ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx); _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
