ffmpeg | branch: master | James Almer <jamr...@gmail.com> | Wed Jun  5 13:41:32 
2024 -0300| [c578bb9864de0e9d63dbd2e334a624ad1b99eaf2] | committer: James Almer

swscale/x86/input: add AVX2 optimized uyvytoyuv422

uyvytoyuv422_c: 23991.8
uyvytoyuv422_sse2: 2817.8
uyvytoyuv422_avx: 2819.3
uyvytoyuv422_avx2: 1972.3

Signed-off-by: James Almer <jamr...@gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c578bb9864de0e9d63dbd2e334a624ad1b99eaf2
---

 libswscale/x86/rgb2rgb.c     |  6 ++++++
 libswscale/x86/rgb_2_rgb.asm | 32 ++++++++++++++++++++++++--------
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index 2bfab2cf16..1dc8f1549c 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -2363,6 +2363,9 @@ void ff_uyvytoyuv422_sse2(uint8_t *ydst, uint8_t *udst, 
uint8_t *vdst,
 void ff_uyvytoyuv422_avx(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
                          const uint8_t *src, int width, int height,
                          int lumStride, int chromStride, int srcStride);
+void ff_uyvytoyuv422_avx2(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                          const uint8_t *src, int width, int height,
+                          int lumStride, int chromStride, int srcStride);
 #endif
 
 #define DEINTERLEAVE_BYTES(cpuext)                                            \
@@ -2435,5 +2438,8 @@ av_cold void rgb2rgb_init_x86(void)
         shuffle_bytes_3210 = ff_shuffle_bytes_3210_avx2;
 #endif
     }
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        uyvytoyuv422 = ff_uyvytoyuv422_avx2;
+    }
 #endif
 }
diff --git a/libswscale/x86/rgb_2_rgb.asm b/libswscale/x86/rgb_2_rgb.asm
index 76ca1eec03..0bf1278718 100644
--- a/libswscale/x86/rgb_2_rgb.asm
+++ b/libswscale/x86/rgb_2_rgb.asm
@@ -34,13 +34,16 @@ pb_shuffle3210: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 
15, 14, 13, 12
 
 SECTION .text
 
-%macro RSHIFT_COPY 3
+%macro RSHIFT_COPY 5
 ; %1 dst ; %2 src ; %3 shift
-%if cpuflag(avx)
-    psrldq  %1, %2, %3
+%if mmsize == 32
+    vperm2i128 %1, %2, %3, %5
+    RSHIFT         %1, %4
+%elif cpuflag(avx)
+    psrldq  %1, %2, %4
 %else
     mova           %1, %2
-    RSHIFT         %1, %3
+    RSHIFT         %1, %4
 %endif
 %endmacro
 
@@ -233,26 +236,37 @@ cglobal uyvytoyuv422, 9, 14, 8, ydst, udst, vdst, src, w, 
h, lum_stride, chrom_s
     jge .end_line
 
     .loop_simd:
+%if mmsize == 32
+        movu   xm2, [srcq + wtwoq         ]
+        movu   xm3, [srcq + wtwoq + 16    ]
+        movu   xm4, [srcq + wtwoq + 16 * 2]
+        movu   xm5, [srcq + wtwoq + 16 * 3]
+        vinserti128 m2, m2, [srcq + wtwoq + 16 * 4], 1
+        vinserti128 m3, m3, [srcq + wtwoq + 16 * 5], 1
+        vinserti128 m4, m4, [srcq + wtwoq + 16 * 6], 1
+        vinserti128 m5, m5, [srcq + wtwoq + 16 * 7], 1
+%else
         movu    m2, [srcq + wtwoq             ]
         movu    m3, [srcq + wtwoq + mmsize    ]
         movu    m4, [srcq + wtwoq + mmsize * 2]
         movu    m5, [srcq + wtwoq + mmsize * 3]
+%endif
 
         ; extract y part 1
-        RSHIFT_COPY    m6, m2, 1 ; UYVY UYVY -> YVYU YVY...
+        RSHIFT_COPY    m6, m2, m4, 1, 0x20 ; UYVY UYVY -> YVYU YVY...
         pand           m6, m1; YxYx YxYx...
 
-        RSHIFT_COPY    m7, m3, 1 ; UYVY UYVY -> YVYU YVY...
+        RSHIFT_COPY    m7, m3, m5, 1, 0x20 ; UYVY UYVY -> YVYU YVY...
         pand           m7, m1 ; YxYx YxYx...
 
         packuswb       m6, m7 ; YYYY YYYY...
         movu [ydstq + wq], m6
 
         ; extract y part 2
-        RSHIFT_COPY    m6, m4, 1 ; UYVY UYVY -> YVYU YVY...
+        RSHIFT_COPY    m6, m4, m2, 1, 0x13 ; UYVY UYVY -> YVYU YVY...
         pand           m6, m1; YxYx YxYx...
 
-        RSHIFT_COPY    m7, m5, 1 ; UYVY UYVY -> YVYU YVY...
+        RSHIFT_COPY    m7, m5, m3, 1, 0x13 ; UYVY UYVY -> YVYU YVY...
         pand           m7, m1 ; YxYx YxYx...
 
         packuswb                m6, m7 ; YYYY YYYY...
@@ -309,4 +323,6 @@ UYVY_TO_YUV422
 
 INIT_XMM avx
 UYVY_TO_YUV422
+INIT_YMM avx2
+UYVY_TO_YUV422
 %endif

_______________________________________________
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to