This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 83694749ad0c0510907c961422b525dbd2fb731f
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Sun Feb 22 19:19:14 2026 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Fri Mar 6 20:02:41 2026 +0100

    avcodec/x86/vvc/of,dsp_init: Avoid unnecessary wrappers
    
    Write them in assembly instead; this exchanges a call+ret
    with a jmp and also avoids the stack for (1<<bpp)-1.
    
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/vvc/dsp_init.c | 30 +++++++++---------------------
 libavcodec/x86/vvc/of.asm     | 28 ++++++++++++++++++++--------
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/libavcodec/x86/vvc/dsp_init.c b/libavcodec/x86/vvc/dsp_init.c
index 37ddbcb73b..158308fb33 100644
--- a/libavcodec/x86/vvc/dsp_init.c
+++ b/libavcodec/x86/vvc/dsp_init.c
@@ -50,24 +50,12 @@ DMVR_PROTOTYPES( 8, avx2)
 DMVR_PROTOTYPES(10, avx2)
 DMVR_PROTOTYPES(12, avx2)
 
-#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
-void ff_vvc_apply_bdof_avx2(uint8_t *dst, ptrdiff_t dst_stride,
-                            const int16_t *src0, const int16_t *src1,
-                            int w, int h, int pixel_max);
-
-#define OF_FUNC(bd, opt)                                                       
                     \
-static void vvc_apply_bdof_##bd##_##opt(uint8_t *dst, ptrdiff_t dst_stride,    
                     \
-    const int16_t *src0, const int16_t *src1, int w, int h)                    
                     \
-{                                                                              
                     \
-    ff_vvc_apply_bdof##_##opt(dst, dst_stride, src0, src1, w, h, (1 << bd)  - 
1);                   \
-}                                                                              
                     \
-
-OF_FUNC( 8, avx2)
-OF_FUNC(10, avx2)
-OF_FUNC(12, avx2)
-
-#define OF_INIT(bd) c->inter.apply_bdof = vvc_apply_bdof_##bd##_avx2
-#endif
+#define OF_INIT(BD, OPT) do {                                                  
    \
+void ff_vvc_apply_bdof_## BD ## _ ## OPT(uint8_t *dst, ptrdiff_t dst_stride,   
    \
+                                         const int16_t *src0, const int16_t 
*src1, \
+                                         int w, int h);                        
    \
+    c->inter.apply_bdof = ff_vvc_apply_bdof_## BD ##_## OPT;                   
    \
+} while (0)
 
 #define ALF_BPC_PROTOTYPES(bpc, opt)                                           
                                          \
 void BF(ff_vvc_alf_filter_luma, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride,  
                                          \
@@ -340,7 +328,7 @@ av_cold void ff_vvc_dsp_init_x86(VVCDSPContext *const c, 
const int bd)
             AVG_INIT(8, avx2);
             DMVR_INIT(8);
             MC_LINKS_AVX2(8);
-            OF_INIT(8);
+            OF_INIT(8, avx2);
             SAD_INIT();
 
             // filter
@@ -362,7 +350,7 @@ av_cold void ff_vvc_dsp_init_x86(VVCDSPContext *const c, 
const int bd)
             DMVR_INIT(10);
             MC_LINKS_AVX2(10);
             MC_LINKS_16BPC_AVX2(10);
-            OF_INIT(10);
+            OF_INIT(10, avx2);
             SAD_INIT();
 
             // filter
@@ -384,7 +372,7 @@ av_cold void ff_vvc_dsp_init_x86(VVCDSPContext *const c, 
const int bd)
             DMVR_INIT(12);
             MC_LINKS_AVX2(12);
             MC_LINKS_16BPC_AVX2(12);
-            OF_INIT(12);
+            OF_INIT(12, avx2);
             SAD_INIT();
 
             // filter
diff --git a/libavcodec/x86/vvc/of.asm b/libavcodec/x86/vvc/of.asm
index 895535c754..5184144739 100644
--- a/libavcodec/x86/vvc/of.asm
+++ b/libavcodec/x86/vvc/of.asm
@@ -345,11 +345,27 @@ INIT_YMM avx2
 %endif
 %endmacro
 
-;void ff_vvc_apply_bdof_%1(uint8_t *dst, const ptrdiff_t dst_stride, int16_t 
*src0, int16_t *src1,
-;    const int w, const int h, const int int pixel_max)
-%macro BDOF_AVX2 0
-cglobal vvc_apply_bdof, 7, 9, 16, BDOF_STACK_SIZE*32, dst, ds, src0, src1, w, 
h, pixel_max, ds3, tmp0
+%macro BDOF_WRAPPER 2 ; bpp, is_nonadjacent
+;void ff_vvc_apply_bdof_%1(uint8_t *dst, const ptrdiff_t dst_stride, const 
int16_t *src0,
+;                          const int16_t *src1, const int w, const int h)
+cglobal vvc_apply_bdof_%1
+    ; r6 is not used for parameter passing and is volatile both on UNIX64
+    ; and Win64, so it can be freely used
+    mov                    r6d, (1<<%1)-1
+%if %2
+    jmp        vvc_apply_bdof_ %+ cpuname
+%endif
+%endmacro
 
+%macro VVC_OF_AVX2 0
+    BDOF_WRAPPER 12, 1
+    BDOF_WRAPPER  8, 1
+    BDOF_WRAPPER 10, 0
+
+vvc_apply_bdof_ %+ cpuname:
+; the prologue on Win64 is big (10 xmm regs need saving), so use PROLOGUE
+; to avoid duplicating it.
+PROLOGUE 6, 9, 16, BDOF_STACK_SIZE*32, dst, ds, src0, src1, w, h, pixel_max, 
ds3, tmp0
     lea                   ds3q, [dsq * 3]
     sub                  src0q, SRC_STRIDE + SRC_PS
     sub                  src1q, SRC_STRIDE + SRC_PS
@@ -370,10 +386,6 @@ cglobal vvc_apply_bdof, 7, 9, 16, BDOF_STACK_SIZE*32, dst, 
ds, src0, src1, w, h,
     RET
 %endmacro
 
-%macro VVC_OF_AVX2 0
-    BDOF_AVX2
-%endmacro
-
 VVC_OF_AVX2
 
 %endif ; HAVE_AVX2_EXTERNAL

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to