This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch release/8.1 in repository ffmpeg.
commit b8ae2fb7140001fdabcaea298d199d549b52e2ce Author: Michael Niedermayer <[email protected]> AuthorDate: Mon Jun 15 19:00:26 2026 +0200 Commit: Michael Niedermayer <[email protected]> CommitDate: Wed Jun 17 04:08:30 2026 +0200 swscale/x86/rgb_2_rgb: fix uyvytoyuv422 overwrite on odd width uyvytoyuv422 converts packed UYVY, whose macroblocks are pixel pairs, and the SIMD code only handled even widths. On an odd width the trailing half macroblock made the kernel write past the end of the Y/U/V destinations: the AVX512ICL masked tail dropped the odd pixel and the fall-through re-entered the SIMD loop, writing a full mmsize*2 chunk past the planes (127 bytes of Y, 63 of U and 63 of V); the sse2/avx/avx2 scalar tail wrote one byte past the Y plane. Process only whole pairs and emit the trailing odd column from a small per-row epilogue that matches uyvytoyuv422_c (ydst[w-1] = src[2w-1], udst[cw-1] = src[2w-2], vdst[cw-1] = src[2w]). All four SIMD variants are now bit-exact with the C reference for even and odd widths and no longer overwrite the destination. Verified on AVX512ICL hardware (Ryzen 9 9950X) with checkasm. Found-by: Claude (Anthropic). Human-verified and reported by Omkhar Arasaratnam <[email protected]>. Signed-off-by: Michael Niedermayer <[email protected]> (cherry picked from commit 21782b7b3143a3ed68de635c83b2094523e4cf39) Signed-off-by: Michael Niedermayer <[email protected]> --- libswscale/x86/rgb_2_rgb.asm | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/libswscale/x86/rgb_2_rgb.asm b/libswscale/x86/rgb_2_rgb.asm index 871bb21127..120cc95c4a 100644 --- a/libswscale/x86/rgb_2_rgb.asm +++ b/libswscale/x86/rgb_2_rgb.asm @@ -193,6 +193,7 @@ cglobal uyvytoyuv422, 9, 14, 8 + cpuflag(avx2) + cpuflag(avx512icl), ydst, udst, movsxdifnidn src_strideq, src_strided mov back_wq, wq + and wq, -2 ; process whole UYVY pairs; trailing odd column via epilogue mov whalfq, wq shr whalfq, 1 ; whalf = width / 2 @@ -212,7 +213,7 @@ cglobal uyvytoyuv422, 9, 14, 8 + cpuflag(avx2) + cpuflag(avx512icl), ydst, udst, ;calc scalar loop count and xq, mmsize * 2 - 1 - je .loop_simd + je .skip_tail %if mmsize == 64 shr xq, 1 @@ -292,6 +293,7 @@ cglobal uyvytoyuv422, 9, 14, 8 + cpuflag(avx2) + cpuflag(avx512icl), ydst, udst, %endif ; check if simd loop is need +.skip_tail: cmp wq, 0 jge .end_line @@ -378,6 +380,15 @@ cglobal uyvytoyuv422, 9, 14, 8 + cpuflag(avx2) + cpuflag(avx512icl), ydst, udst, jl .loop_simd .end_line: + test back_wq, 1 + jz .skip_last + mov tmpb, [srcq + 1] + mov [ydstq], tmpb + mov tmpb, [srcq + 0] + mov [udstq], tmpb + mov tmpb, [srcq + 2] + mov [vdstq], tmpb + .skip_last: add srcq, src_strideq add ydstq, lum_strideq add udstq, chrom_strideq @@ -385,6 +396,7 @@ cglobal uyvytoyuv422, 9, 14, 8 + cpuflag(avx2) + cpuflag(avx512icl), ydst, udst, ;restore initial state of line variable mov wq, back_wq + and wq, -2 mov xq, wq mov whalfq, wq shr whalfq, 1 ; whalf = width / 2 _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
