This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 9cb5280c0e4c0f1e832276d160055d3f9a71b17e Author: Andreas Rheinhardt <[email protected]> AuthorDate: Sat Feb 28 19:21:51 2026 +0100 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Fri Mar 6 20:02:41 2026 +0100 avcodec/x86/vvc/alf: Improve storing 8bpp When width is known to be 8 (i.e. for luma that is not width 16), the upper lane is unused, so use an xmm-sized packuswb and avoid the vpermq altogether. For chroma not known to be 16 (i.e. 4,8 or 12) defer extracting from the high lane until it is known to be needed. Also do so via vextracti128 instead of vpermq (also do this for bpp>8). Also use vextracti128 and an xmm-sized packuswb in case of width 16 instead of an ymm-sized packuswb followed by vextracti128. Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/x86/vvc/alf.asm | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/libavcodec/x86/vvc/alf.asm b/libavcodec/x86/vvc/alf.asm index ed83134cd4..8798d7b3c9 100644 --- a/libavcodec/x86/vvc/alf.asm +++ b/libavcodec/x86/vvc/alf.asm @@ -354,11 +354,7 @@ SECTION .text jl .w4 STORE_PIXELS_W8 %1, %2 je .end - %if ps == 2 - vpermq m%2, m%2, q0302 - %else - vpermq m%2, m%2, q0101 - %endif + vextracti128 xm%2, m%2, 1 STORE_PIXELS_W4 %1, %2, 8 jmp .end .w4: @@ -366,19 +362,24 @@ SECTION .text .end: %endmacro -; STORE_PIXELS(dst, src, width) -%macro STORE_PIXELS 3 - %if ps == 1 - packuswb m%2, m%2 - vpermq m%2, m%2, 0x8 - %endif - +; STORE_PIXELS(dst, src, width, tmp reg) +%macro STORE_PIXELS 4 %ifidn %3, 16 + %if ps == 1 + vextracti128 xm%4, m%2, 1 + packuswb xm%2, xm%4 + %endif STORE_PIXELS_W16 %1, %2 %else %if LUMA + %if ps == 1 + packuswb xm%2, xm%2 + %endif STORE_PIXELS_W8 %1, %2 %else + %if ps == 1 + packuswb m%2, m%2 + %endif STORE_PIXELS_W8LE %1, %2, %3 %endif %endif @@ -413,7 +414,7 @@ SECTION .text CLIPW m0, m14, m15 %endif - STORE_PIXELS dstq, 0, %1 + STORE_PIXELS dstq, 0, %1, 2 lea srcq, [srcq + src_strideq] lea dstq, [dstq + dst_strideq] _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
