of: Avoid jump

Andreas Rheinhardt via ffmpeg-cvslog Fri, 06 Mar 2026 11:37:39 -0800

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit 2159e40ab31856613266d69d7d2ba579f0df407d
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Sat Feb 28 16:25:32 2026 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Fri Mar 6 20:02:41 2026 +0100

    avcodec/x86/vvc/of: Avoid jump
    
    At the end of the height==8 codepath, a jump to RET at the end
    of the height==16 codepath is performed. Yet the epilogue
    is so cheap on Unix64 that this jump is not worthwhile.
    For Win64 meanwhile, one can still avoid jumps, because
    for width 16 >8bpp and width 8 8bpp content a jump is performed
    to the end of the height==8 position, immediately followed
    by a jump to RET. These two jumps can be combined into one.
    
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/vvc/of.asm | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/libavcodec/x86/vvc/of.asm b/libavcodec/x86/vvc/of.asm
index 14a9ae6898..eca52f244f 100644
--- a/libavcodec/x86/vvc/of.asm
+++ b/libavcodec/x86/vvc/of.asm
@@ -71,7 +71,7 @@ INIT_YMM avx2
     pmulhrsw                      %1, m11
 %endmacro
 
-%macro SAVE 2 ; dst, src
+%macro SAVE 2-3 ""; dst, src, jump target
     cmp                 pixel_maxd, (1 << 8) - 1
     jne               %%save_16bpc
 
@@ -80,14 +80,22 @@ INIT_YMM avx2
     cmp                         wd, 16
     je                       %%w16_8
     movq                        %1, xm%2
+%ifnidn %3, ""
+    jmp                         %3
+%else
     jmp                      %%end
+%endif
 
 %%save_16bpc:
     CLIPW                      m%2, m9, m10
     cmp                         wd, 16
     jne                       %%w8_16
     movu                        %1, m%2
+%ifnidn %3, ""
+    jmp                         %3
+%else
     jmp                      %%end
+%endif
 
 %%w16_8:
     vpermq                     m%2, m%2, q0020
@@ -98,7 +106,7 @@ INIT_YMM avx2
 
 ; [rsp + even * mmsize] are gradient_h[0] - gradient_h[1]
 ; [rsp +  odd * mmsize] are gradient_v[0] - gradient_v[1]
-%macro APPLY_BDOF_MIN_BLOCK 4 ; block_num, vx, vy, bd
+%macro APPLY_BDOF_MIN_BLOCK 3-4 ""; block_num, vx, vy, jump target
     pxor                          m9, m9
 
     movd                        xm10, pixel_maxd
@@ -118,7 +126,7 @@ INIT_YMM avx2
     SAVE                        [dstq + 2 * dsq], 6
 
     APPLY_BDOF_MIN_BLOCK_LINE    m6, %2, %3, m7, (%1) * 4 + 3
-    SAVE                        [dstq + ds3q], 6
+    SAVE                        [dstq + ds3q], 6, %4
 %endmacro
 
 %macro SUM_MIN_BLOCK_W16 4 ; src/dst, shuffle, perm, tmp
@@ -327,7 +335,12 @@ INIT_YMM avx2
 %if (%2)
     BDOF_PROF_GRAD  %1 * 4 + 3, %2
     BDOF_VX_VY              12, 13
-    APPLY_BDOF_MIN_BLOCK    %1, m12, m13, bd
+%if UNIX64
+    APPLY_BDOF_MIN_BLOCK    %1, m12, m13
+%else
+    APPLY_BDOF_MIN_BLOCK    %1, m12, m13, .end
+%endif
+
 %else
     mova                   m14, m12
     mova                   m15, m13
@@ -340,7 +353,7 @@ INIT_YMM avx2
     paddw                  m15, m13
 
     BDOF_VX_VY              14, 15
-    APPLY_BDOF_MIN_BLOCK    %1, m14, m15, bd
+    APPLY_BDOF_MIN_BLOCK    %1, m14, m15
     lea                   dstq, [dstq + 4 * dsq]
 %endif
 %endmacro
@@ -375,7 +388,11 @@ PROLOGUE 6, 9, 16, BDOF_STACK_SIZE*32, dst, ds, src0, 
src1, w, h, pixel_max, ds3
     cmp                     hd, 16
     je                    .h16
     BDOF_MINI_BLOCKS         1, 1
+%if UNIX64
+    RET
+%else
     jmp                   .end
+%endif
 
 .h16:
     BDOF_MINI_BLOCKS         1, 0

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 09/28: avcodec/x86/vvc/of: Avoid jump

Reply via email to