This is an automated email from the git hooks/post-receive script.
Git pushed a commit to branch master
in repository ffmpeg.
The following commit(s) were added to refs/heads/master by this push:
new c29465bcb6 swscale/x86/ops: use plain `ret` instruction
c29465bcb6 is described below
commit c29465bcb62122658ba785276cc3b8538344ca61
Author: Niklas Haas <[email protected]>
AuthorDate: Sat Apr 11 12:44:33 2026 +0200
Commit: Niklas Haas <[email protected]>
CommitDate: Sat Apr 11 16:30:15 2026 +0000
swscale/x86/ops: use plain `ret` instruction
The original intent here was probably to make the ops code agnostic to
which operation is actually last in the list, but the existence of a
divergence between CONTINUE and FINISH already implies that we hard-code
the assumption that the final operation is a write op.
So we can just massively simplify this with a call/ret pair instead of
awkwardly exporting and then jumping back to the return label. This actually
collapses FINISH down into just a plain RET, since the op kernels already
don't set up any extra stack frame.
Signed-off-by: Niklas Haas <[email protected]>
---
libswscale/x86/ops.c | 3 ---
libswscale/x86/ops_common.asm | 20 +-------------------
libswscale/x86/ops_int.asm | 38 +++++++++-----------------------------
3 files changed, 10 insertions(+), 51 deletions(-)
diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c
index ab113aa780..406d7c4c09 100644
--- a/libswscale/x86/ops.c
+++ b/libswscale/x86/ops.c
@@ -1006,9 +1006,6 @@ static int compile(SwsContext *ctx, SwsOpList *ops,
SwsCompiledOp *out)
#define ASSIGN_PROCESS_FUNC(NAME) \
do { \
SWS_DECL_FUNC(NAME); \
- void NAME##_return(void); \
- ret = ff_sws_op_chain_append(chain, NAME##_return, \
- NULL, &(SwsOpPriv) {0}); \
out->func = NAME; \
} while (0)
diff --git a/libswscale/x86/ops_common.asm b/libswscale/x86/ops_common.asm
index 0961d74a51..c48e17d3cc 100644
--- a/libswscale/x86/ops_common.asm
+++ b/libswscale/x86/ops_common.asm
@@ -26,12 +26,7 @@
; function is responsible for the block loop, as well as initializing the
; plane pointers. It will jump directly into the first operation kernel,
; and each operation kernel will jump directly into the next one, with the
-; final kernel jumping back into the sws_process return point. (See label
-; `sws_process.return` in ops_int.asm)
-;
-; To handle the jump back to the return point, we append an extra address
-; corresponding to the correct sws_process.return label into the SwsOpChain,
-; and have the WRITE kernel jump into it as usual. (See the FINISH macro)
+; final kernel returning back into the entry point.
;
; Inside an operation chain, we use a custom calling convention to preserve
; registers between kernels. The exact register allocation is found further
@@ -291,19 +286,6 @@ endstruc
CONTINUE tmp0q
%endmacro
-; Final macro to end the operation chain, used by WRITE kernels to jump back
-; to the process function return point. Very similar to CONTINUE, but skips
-; incrementing the implq pointer, and also clears AVX registers to avoid
-; phantom dependencies between loop iterations.
-%macro FINISH 1 ; reg
- %if vzeroupper_required
- ; we may jump back into an SSE read, so always zero upper regs here
- vzeroupper
- %endif
- jmp %1
- annotate_function_size
-%endmacro
-
; Helper for inline conditionals; used to conditionally include single lines
%macro IF 2+ ; cond, body
%if %1
diff --git a/libswscale/x86/ops_int.asm b/libswscale/x86/ops_int.asm
index 657a40df65..f28c8c640d 100644
--- a/libswscale/x86/ops_int.asm
+++ b/libswscale/x86/ops_int.asm
@@ -93,27 +93,12 @@ IF %1 > 3, mov in3q, [execq + SwsOpExec.in3]
IF %1 > 1, mov out1q, [execq + SwsOpExec.out1]
IF %1 > 2, mov out2q, [execq + SwsOpExec.out2]
IF %1 > 3, mov out3q, [execq + SwsOpExec.out3]
- jmp [rsp] ; call into op chain
-
-; Declare a separate global label for the return point, so that we can append
-; it to the list of op function pointers from the C code, effectively ensuring
-; that we end up here again after the op chain finishes processing a line.
-; (See also: cglobal_label in x86inc.asm)
-%if FORMAT_ELF
- global current_function %+ _return:function hidden
-%elif FORMAT_MACHO && HAVE_PRIVATE_EXTERN
- global current_function %+ _return:private_extern
-%else
- global current_function %+ _return
-%endif
-align function_align
-current_function %+ _return:
-
- ; op chain always returns back here
+.loop:
+ call [rsp] ; call into op chain
mov implq, [rsp + 8]
inc bxd
cmp bxd, [rsp + 20]
- jne .continue
+ jne .loop
; end of line
inc yd
cmp yd, [rsp + 24]
@@ -131,7 +116,7 @@ IF %1 > 3, add out3q, [execq + SwsOpExec.out_bump3]
; conditionally apply y bump (if non-NULL)
mov tmp0q, [execq + SwsOpExec.in_bump_y]
test tmp0q, tmp0q
- jz .continue
+ jz .loop
movsxd tmp0q, [tmp0q + yq * 4 - 4] ; load (signed) y bump
%if %1 > 3
mov tmp1q, tmp0q
@@ -150,8 +135,7 @@ IF %1 > 3, add out3q, [execq + SwsOpExec.out_bump3]
%endif
imul tmp0q, [execq + SwsOpExec.in_stride0]
add in0q, tmp0q
-.continue:
- jmp [rsp]
+ jmp .loop
.end:
add rsp, 32
RET
@@ -271,7 +255,6 @@ IF %1 > 3, add in3q, mmsize * (1 + V2)
%macro write_planar 1 ; elems
op write_planar%1
- LOAD_CONT tmp0q
movu [out0q], mx
IF %1 > 1, movu [out1q], my
IF %1 > 2, movu [out2q], mz
@@ -286,7 +269,7 @@ IF %1 > 3, movu [out3q + mmsize], mw2
IF %1 > 1, add out1q, mmsize * (1 + V2)
IF %1 > 2, add out2q, mmsize * (1 + V2)
IF %1 > 3, add out3q, mmsize * (1 + V2)
- FINISH tmp0q
+ RET
%endmacro
%macro read_packed2 1 ; depth
@@ -325,7 +308,6 @@ IF %1 < 32, VBROADCASTI128 m12, [read%1_unpack2]
%macro write_packed2 1 ; depth
op write%1_packed2
IF %1 < 32, VBROADCASTI128 m12, [write%1_pack2]
- LOAD_CONT tmp0q
%if cpuflag(avx2)
vpermq mx, mx, q3120 ; { X0 X2 | X1 X3 }
vpermq my, my, q3120 ; { Y0 Y2 | Y1 Y3 }
@@ -352,7 +334,7 @@ IF %1 < 32, VBROADCASTI128 m12, [write%1_pack2]
IF V2, movu [out0q + 2*mmsize], m10
IF V2, movu [out0q + 3*mmsize], m11
add out0q, mmsize * (2 + V2 * 2)
- FINISH tmp0q
+ RET
%endmacro
; helper macro reused for both 3 and 4 component packed reads
@@ -433,11 +415,10 @@ IF1 V2, read_packed_inner mx2, my2, mz2, mw2, in0q +
%1 * mmsize, %1, %2
%macro write_packed 2 ; num, depth
op write%2_packed%1
IF %2 < 32, VBROADCASTI128 m12, [write%2_pack%1]
- LOAD_CONT tmp0q
write_packed_inner mx, my, mz, mw, out0q, %1, %2
IF1 V2, write_packed_inner mx2, my2, mz2, mw2, out0q + %1 * mmsize, %1, %2
add out0q, %1 * mmsize * (1 + V2)
- FINISH tmp0q
+ RET
%endmacro
%macro rw_packed 1 ; depth
@@ -512,9 +493,8 @@ IF V2, pshufb mx2, m8
IF V2, pmovmskb tmp1d, mx2
mov [out0q], tmp0d
IF V2, mov [out0q + (mmsize >> 3)], tmp1d
- LOAD_CONT tmp0q
add out0q, (mmsize >> 3) * (1 + V2)
- FINISH tmp0q
+ RET
%endmacro
;--------------------------
_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]