rgb_2_rgb: fix uyvytoyuv422 overwrite on odd width

Michael Niedermayer via ffmpeg-cvslog Tue, 16 Jun 2026 19:17:46 -0700

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch release/8.1
in repository ffmpeg.


commit b8ae2fb7140001fdabcaea298d199d549b52e2ce
Author:     Michael Niedermayer <[email protected]>
AuthorDate: Mon Jun 15 19:00:26 2026 +0200
Commit:     Michael Niedermayer <[email protected]>
CommitDate: Wed Jun 17 04:08:30 2026 +0200

    swscale/x86/rgb_2_rgb: fix uyvytoyuv422 overwrite on odd width
    
    uyvytoyuv422 converts packed UYVY, whose macroblocks are pixel pairs, and
    the SIMD code only handled even widths. On an odd width the trailing half
    macroblock made the kernel write past the end of the Y/U/V destinations:
    the AVX512ICL masked tail dropped the odd pixel and the fall-through
    re-entered the SIMD loop, writing a full mmsize*2 chunk past the planes
    (127 bytes of Y, 63 of U and 63 of V); the sse2/avx/avx2 scalar tail
    wrote one byte past the Y plane.
    
    Process only whole pairs and emit the trailing odd column from a small
    per-row epilogue that matches uyvytoyuv422_c (ydst[w-1] = src[2w-1],
    udst[cw-1] = src[2w-2], vdst[cw-1] = src[2w]).
    
    All four SIMD variants are now bit-exact with the C reference for even and
    odd widths and no longer overwrite the destination. Verified on AVX512ICL
    hardware (Ryzen 9 9950X) with checkasm.
    
    Found-by: Claude (Anthropic). Human-verified and reported by Omkhar 
Arasaratnam <[email protected]>.
    Signed-off-by: Michael Niedermayer <[email protected]>
    (cherry picked from commit 21782b7b3143a3ed68de635c83b2094523e4cf39)
    Signed-off-by: Michael Niedermayer <[email protected]>
---
 libswscale/x86/rgb_2_rgb.asm | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/libswscale/x86/rgb_2_rgb.asm b/libswscale/x86/rgb_2_rgb.asm
index 871bb21127..120cc95c4a 100644
--- a/libswscale/x86/rgb_2_rgb.asm
+++ b/libswscale/x86/rgb_2_rgb.asm
@@ -193,6 +193,7 @@ cglobal uyvytoyuv422, 9, 14, 8 + cpuflag(avx2) + 
cpuflag(avx512icl), ydst, udst,
     movsxdifnidn   src_strideq, src_strided
 
     mov     back_wq, wq
+    and          wq, -2     ; process whole UYVY pairs; trailing odd column 
via epilogue
     mov      whalfq, wq
     shr      whalfq, 1     ; whalf = width / 2
 
@@ -212,7 +213,7 @@ cglobal uyvytoyuv422, 9, 14, 8 + cpuflag(avx2) + 
cpuflag(avx512icl), ydst, udst,
 
     ;calc scalar loop count
     and       xq, mmsize * 2 - 1
-    je .loop_simd
+    je .skip_tail
 
 %if mmsize == 64
     shr     xq, 1
@@ -292,6 +293,7 @@ cglobal uyvytoyuv422, 9, 14, 8 + cpuflag(avx2) + 
cpuflag(avx512icl), ydst, udst,
 %endif
 
     ; check if simd loop is need
+.skip_tail:
     cmp      wq, 0
     jge .end_line
 
@@ -378,6 +380,15 @@ cglobal uyvytoyuv422, 9, 14, 8 + cpuflag(avx2) + 
cpuflag(avx512icl), ydst, udst,
         jl .loop_simd
 
     .end_line:
+        test    back_wq, 1
+        jz .skip_last
+        mov       tmpb, [srcq + 1]
+        mov     [ydstq], tmpb
+        mov       tmpb, [srcq + 0]
+        mov     [udstq], tmpb
+        mov       tmpb, [srcq + 2]
+        mov     [vdstq], tmpb
+    .skip_last:
         add        srcq, src_strideq
         add        ydstq, lum_strideq
         add        udstq, chrom_strideq
@@ -385,6 +396,7 @@ cglobal uyvytoyuv422, 9, 14, 8 + cpuflag(avx2) + 
cpuflag(avx512icl), ydst, udst,
 
         ;restore initial state of line variable
         mov           wq, back_wq
+        and           wq, -2
         mov          xq, wq
         mov      whalfq, wq
         shr      whalfq, 1     ; whalf = width / 2

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 04/05: swscale/x86/rgb_2_rgb: fix uyvytoyuv422 overwrite on odd width

Reply via email to