PR #23449 opened by Kacper Michajłow (kasper93) URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23449 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23449.patch
From a3f2ae10069cbd09e6727e62555fc7c0dc72819c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]> Date: Thu, 11 Jun 2026 03:16:13 +0200 Subject: [PATCH 1/9] avutil/x86/x86inc: add ENDBR define Maps to endbr64/endbr32 depending on target arch, disabled on pre-i686. --- libavutil/x86/x86inc.asm | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index 0e80ebed43..edd7d18f52 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -87,6 +87,29 @@ %define FORCE_VEX_ENCODING 0 %endif +; Mark a valid target for indirect branches when Indirect Branch Tracking is +; enabled. Except on pre-i686 where endbr32 is not a valid instruction encoding. +%if ARCH_X86_64 || HAVE_I686 + %ifdef __NASM_VERSION_ID__ + %if __NASM_VERSION_ID__ >= 0x020e0000 ; 2.14 + %if ARCH_X86_64 + %define ENDBR endbr64 + %else + %define ENDBR endbr32 + %endif + %endif + %endif + %ifndef ENDBR + %if ARCH_X86_64 + %define ENDBR db 0xf3, 0x0f, 0x1e, 0xfa ; endbr64 + %else + %define ENDBR db 0xf3, 0x0f, 0x1e, 0xfb ; endbr32 + %endif + %endif +%else + %define ENDBR +%endif + ; aout does not support align= ; NOTE: This section is out of sync with x264, in order to ; keep supporting OS/2. -- 2.52.0 From c99c41b30e7620554f6d5e59b0d968fe25cf3e8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]> Date: Thu, 11 Jun 2026 03:22:51 +0200 Subject: [PATCH 2/9] avutil/x86/x86inc: emit ENDBR at function entry points --- libavutil/x86/x86inc.asm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index edd7d18f52..5f63e1d364 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -872,6 +872,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, %endif align function_align %2: + ENDBR RESET_MM_PERMUTATION ; needed for x86-64, also makes disassembly somewhat nicer %xdefine rstk rsp ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required %assign stack_offset 0 ; stack pointer offset relative to the return address @@ -893,6 +894,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, global current_function %+ %1 %endif %1: + ENDBR %endmacro %macro cextern 1 -- 2.52.0 From 13f577f509e9ece84e8351c2ac45260289014e9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]> Date: Thu, 11 Jun 2026 03:23:16 +0200 Subject: [PATCH 3/9] avutil/x86/x86inc: mark objects as IBT compatible --- libavutil/x86/x86inc.asm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index 5f63e1d364..0bafbc8054 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -939,7 +939,8 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, db "GNU",0 ; n_name dd 0xc0000002 ; pr_type = GNU_PROPERTY_X86_FEATURE_1_AND dd 0x00000004 ; pr_datasz - dd 0x00000002 ; pr_data = GNU_PROPERTY_X86_FEATURE_1_SHSTK + dd 0x00000003 ; pr_data = GNU_PROPERTY_X86_FEATURE_1_IBT | + ; GNU_PROPERTY_X86_FEATURE_1_SHSTK dd 0x00000000 ; pr_padding %endif %endif -- 2.52.0 From dafe014fc11e3a54e4f9903f42315768a8c9140d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]> Date: Thu, 11 Jun 2026 03:14:58 +0200 Subject: [PATCH 4/9] avutil/x86/asm: add ENDBR define Maps to endbr64/endbr32 depending on target arch, disabled on pre-i686. --- libavutil/x86/asm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libavutil/x86/asm.h b/libavutil/x86/asm.h index f06ea25035..b51c0e393a 100644 --- a/libavutil/x86/asm.h +++ b/libavutil/x86/asm.h @@ -98,6 +98,16 @@ typedef int x86_reg; # define XMM_CLOBBERS_ONLY(...) #endif +/* Mark a valid target for indirect branches when Indirect Branch Tracking + * is enabled. Except on pre-i686 where endbr32 is not a valid instruction encoding. */ +#if ARCH_X86_64 +# define ENDBR ".byte 0xf3, 0x0f, 0x1e, 0xfa\n\t" /* endbr64 */ +#elif HAVE_I686 +# define ENDBR ".byte 0xf3, 0x0f, 0x1e, 0xfb\n\t" /* endbr32 */ +#else +# define ENDBR +#endif + /* Use to export labels from asm. */ #define LABEL_MANGLE(a) EXTERN_PREFIX #a -- 2.52.0 From c0945e187411da861c99eb744e87b7301d2538ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]> Date: Thu, 11 Jun 2026 03:17:10 +0200 Subject: [PATCH 5/9] [TO BE UPSTREAMED] do not merge --- tests/checkasm/ext/src/x86/x86inc.asm | 31 ++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/tests/checkasm/ext/src/x86/x86inc.asm b/tests/checkasm/ext/src/x86/x86inc.asm index ece93f1452..957ea98f98 100644 --- a/tests/checkasm/ext/src/x86/x86inc.asm +++ b/tests/checkasm/ext/src/x86/x86inc.asm @@ -97,6 +97,32 @@ %define FORCE_VEX_ENCODING 0 %endif +; Mark a valid target for indirect branches when Indirect Branch Tracking is +; enabled. Except on pre-i686 where endbr32 is not a valid instruction encoding. +%ifndef HAVE_I686 + %define HAVE_I686 0 +%endif +%if ARCH_X86_64 || HAVE_I686 + %ifdef __NASM_VERSION_ID__ + %if __NASM_VERSION_ID__ >= 0x020e0000 ; 2.14 + %if ARCH_X86_64 + %define ENDBR endbr64 + %else + %define ENDBR endbr32 + %endif + %endif + %endif + %ifndef ENDBR + %if ARCH_X86_64 + %define ENDBR db 0xf3, 0x0f, 0x1e, 0xfa ; endbr64 + %else + %define ENDBR db 0xf3, 0x0f, 0x1e, 0xfb ; endbr32 + %endif + %endif +%else + %define ENDBR +%endif + %macro SECTION_RODATA 0-1 16 %ifidn __OUTPUT_FORMAT__,win32 SECTION .rdata align=%1 @@ -861,6 +887,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, %endif align function_align %2: + ENDBR RESET_MM_PERMUTATION ; needed for x86-64, also makes disassembly somewhat nicer %xdefine rstk rsp ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required %assign stack_offset 0 ; stack pointer offset relative to the return address @@ -882,6 +909,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, global current_function %+ %1 %endif %1: + ENDBR %endmacro %macro cextern 1 @@ -926,7 +954,8 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, db "GNU",0 ; n_name dd 0xc0000002 ; pr_type = GNU_PROPERTY_X86_FEATURE_1_AND dd 0x00000004 ; pr_datasz - dd 0x00000002 ; pr_data = GNU_PROPERTY_X86_FEATURE_1_SHSTK + dd 0x00000003 ; pr_data = GNU_PROPERTY_X86_FEATURE_1_IBT | + ; GNU_PROPERTY_X86_FEATURE_1_SHSTK dd 0x00000000 ; pr_padding %endif %endif -- 2.52.0 From c16319a861dc8e2c87baab8fc6b97143703f55b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]> Date: Thu, 11 Jun 2026 03:18:31 +0200 Subject: [PATCH 6/9] avcodec/x86/mlpdsp_init: mark indirect branch targets with ENDBR --- libavcodec/x86/mlpdsp_init.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libavcodec/x86/mlpdsp_init.c b/libavcodec/x86/mlpdsp_init.c index 21a0e38143..5f95ef9423 100644 --- a/libavcodec/x86/mlpdsp_init.c +++ b/libavcodec/x86/mlpdsp_init.c @@ -76,6 +76,7 @@ static const void * const iirtable[5] = { &ff_mlp_iirorder_0, &ff_mlp_iirorder_1 #define MLPMUL(label, offset, offs, offc) \ LABEL_MANGLE(label)": \n\t" \ + ENDBR \ "movslq "offset"+"offs"(%0), %%rax\n\t" \ "movslq "offset"+"offc"(%1), %%rdx\n\t" \ "imul %%rdx, %%rax\n\t" \ @@ -83,6 +84,7 @@ static const void * const iirtable[5] = { &ff_mlp_iirorder_0, &ff_mlp_iirorder_1 #define FIRMULREG(label, offset, firc)\ LABEL_MANGLE(label)": \n\t" \ + ENDBR \ "movslq "#offset"(%0), %%rax\n\t" \ "imul %"#firc", %%rax\n\t" \ "add %%rax, %%rsi\n\t" @@ -101,6 +103,7 @@ static const void * const iirtable[5] = { &ff_mlp_iirorder_0, &ff_mlp_iirorder_1 #define MLPMUL(label, offset, offs, offc) \ LABEL_MANGLE(label)": \n\t" \ + ENDBR \ "mov "offset"+"offs"(%0), %%eax\n\t" \ "imull "offset"+"offc"(%1) \n\t" \ "add %%eax , %%esi\n\t" \ @@ -155,12 +158,14 @@ static void mlp_filter_channel_x86(int32_t *state, const int32_t *coeff, FIRMUL (ff_mlp_firorder_2, 0x04 ) FIRMULREG(ff_mlp_firorder_1, 0x00, 8) LABEL_MANGLE(ff_mlp_firorder_0)":\n\t" + ENDBR "jmp *%6 \n\t" IIRMUL (ff_mlp_iirorder_4, 0x0c ) IIRMUL (ff_mlp_iirorder_3, 0x08 ) IIRMUL (ff_mlp_iirorder_2, 0x04 ) IIRMUL (ff_mlp_iirorder_1, 0x00 ) LABEL_MANGLE(ff_mlp_iirorder_0)":\n\t" + ENDBR SHIFT_ACCUM "mov "RESULT" ,"ACCUM" \n\t" "add (%2) ,"RESULT" \n\t" -- 2.52.0 From 94499e71d1ddb833404cf26a45c0f6ad4aa3526b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]> Date: Thu, 11 Jun 2026 03:20:28 +0200 Subject: [PATCH 7/9] avcodec/x86/vp9itxfm: mark indirect branch targets with ENDBR --- libavcodec/x86/vp9itxfm_16bpp_avx512.asm | 2 ++ libavcodec/x86/vp9itxfm_avx2.asm | 6 ++++++ libavcodec/x86/vp9itxfm_avx512.asm | 2 ++ 3 files changed, 10 insertions(+) diff --git a/libavcodec/x86/vp9itxfm_16bpp_avx512.asm b/libavcodec/x86/vp9itxfm_16bpp_avx512.asm index 1924233469..4d5dd4a129 100644 --- a/libavcodec/x86/vp9itxfm_16bpp_avx512.asm +++ b/libavcodec/x86/vp9itxfm_16bpp_avx512.asm @@ -375,6 +375,7 @@ cglobal vp9_idct_16x16_internal_10, 0, 7, 22, dst, stride, c, eob, tx2 TRANSPOSE_4D 4, 5, 6, 7, 16 jmp tx2q .pass2: + ENDBR test eobd, eobd jl .pass2_fast call .main_part1 @@ -603,6 +604,7 @@ cglobal vp9_iadst_16x16_internal_10, 0, 7, 22, dst, stride, c, eob, tx2 WRAP_YMM IADST16_PASS1_END jmp m(vp9_idct_16x16_internal_10).pass1_fast_end .pass2: + ENDBR test eobd, eobd jl .pass2_fast call .main_part1 diff --git a/libavcodec/x86/vp9itxfm_avx2.asm b/libavcodec/x86/vp9itxfm_avx2.asm index c5ee2426e2..e632774713 100644 --- a/libavcodec/x86/vp9itxfm_avx2.asm +++ b/libavcodec/x86/vp9itxfm_avx2.asm @@ -336,6 +336,7 @@ cglobal vp9_idct_4x4_internal, 0, 5, 6, dst, stride, c, eob, tx2 pshufb m1, m3, m2 jmp tx2q .pass2: + ENDBR call .main .pass2_end: vpbroadcastd m2, [o(pw_2048)] @@ -382,6 +383,7 @@ cglobal vp9_iadst_4x4_internal, 0, 5, 6, dst, stride, c, eob, tx2 call .main jmp m(vp9_idct_4x4_internal).pass1_end .pass2: + ENDBR call .main jmp m(vp9_idct_4x4_internal).pass2_end ALIGN function_align @@ -481,6 +483,7 @@ cglobal vp9_idct_8x8_internal, 0, 5, 8, dst, stride, c, eob, tx2 vperm2i128 m3, m5, m3, 0x31 jmp tx2q .pass2: + ENDBR call .main vpbroadcastd m4, [o(pw_1024)] vpermq m1, m1, q2031 @@ -553,6 +556,7 @@ cglobal vp9_iadst_8x8_internal, 0, 5, 8, dst, stride, c, eob, tx2 vinserti128 m1, m4, xm1, 1 jmp tx2q .pass2: + ENDBR pshufd m4, m0, q1032 pshufd m5, m1, q1032 call .main @@ -923,6 +927,7 @@ cglobal vp9_idct_16x16_internal, 0, 5, 16, 32*6, dst, stride, c, eob, tx2 call .transpose_8x8 jmp tx2q .pass2: + ENDBR test eobd, eobd jl .pass2_fast call .main @@ -1039,6 +1044,7 @@ cglobal vp9_iadst_16x16_internal, 0, 5, 16, 32*6, dst, stride, c, eob, tx2 mova xm0, [rsp+32*0] jmp m(vp9_idct_16x16_internal).pass1_fast_end .pass2: + ENDBR test eobd, eobd jl .pass2_fast call .main diff --git a/libavcodec/x86/vp9itxfm_avx512.asm b/libavcodec/x86/vp9itxfm_avx512.asm index d51c50756d..788a63f222 100644 --- a/libavcodec/x86/vp9itxfm_avx512.asm +++ b/libavcodec/x86/vp9itxfm_avx512.asm @@ -524,6 +524,7 @@ cglobal vp9_idct_16x16_internal, 0, 5, 16, dst, stride, c, eob, tx2 punpckldq m0, m4 ; 0-1 jmp tx2q .pass2: + ENDBR test eobd, eobd jl .pass2_fast call .main @@ -771,6 +772,7 @@ cglobal vp9_iadst_16x16_internal, 0, 5, 16, dst, stride, c, eob, tx2 vpermt2q m3, m5, m4 jmp tx2q .pass2: + ENDBR pshufd m1, m1, q1032 pshufd m3, m3, q1032 test eobd, eobd -- 2.52.0 From f75f92d4cf88e7335574e28378a00b39b7f21db3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]> Date: Thu, 11 Jun 2026 03:20:48 +0200 Subject: [PATCH 8/9] avcodec/x86/vvc/mc: mark indirect branch targets with ENDBR --- libavcodec/x86/vvc/mc.asm | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libavcodec/x86/vvc/mc.asm b/libavcodec/x86/vvc/mc.asm index 4f078ea8d0..2ccd20d45f 100644 --- a/libavcodec/x86/vvc/mc.asm +++ b/libavcodec/x86/vvc/mc.asm @@ -76,6 +76,7 @@ SECTION .text %if %3 INIT_XMM cpuname .w2: + ENDBR movd xm0, [src0q] pinsrd xm0, [src0q + AVG_SRC_STRIDE], 1 movd xm1, [src1q] @@ -85,6 +86,7 @@ INIT_XMM cpuname AVG_LOOP_END .w2 .w4: + ENDBR movq xm0, [src0q] pinsrq xm0, [src0q + AVG_SRC_STRIDE], 1 movq xm1, [src1q] @@ -96,6 +98,7 @@ INIT_XMM cpuname INIT_YMM cpuname .w8: + ENDBR movu xm0, [src0q] movu xm1, [src1q] vinserti128 m0, m0, [src0q + AVG_SRC_STRIDE], 1 @@ -106,21 +109,25 @@ INIT_YMM cpuname AVG_LOOP_END .w8 .w16: + ENDBR AVG_W16_FN %1, %2, 1 AVG_LOOP_END .w16 .w32: + ENDBR AVG_W16_FN %1, %2, 2 AVG_LOOP_END .w32 .w64: + ENDBR AVG_W16_FN %1, %2, 4 AVG_LOOP_END .w64 .w128: + ENDBR AVG_W16_FN %1, %2, 8 AVG_LOOP_END .w128 -- 2.52.0 From c31ff7f1780c0cc1e1a356f32ba84c94363aec79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]> Date: Thu, 11 Jun 2026 03:21:32 +0200 Subject: [PATCH 9/9] swscale/x86/hscale_fast_bilinear_simd: mark indirect branch targets with ENDBR --- libswscale/x86/hscale_fast_bilinear_simd.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libswscale/x86/hscale_fast_bilinear_simd.c b/libswscale/x86/hscale_fast_bilinear_simd.c index d8a4e444b4..cd11874349 100644 --- a/libswscale/x86/hscale_fast_bilinear_simd.c +++ b/libswscale/x86/hscale_fast_bilinear_simd.c @@ -131,6 +131,16 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, xpos = 0; // lumXInc/2 - 0x8000; // difference between pixel centers fragmentPos = 0; +#if ARCH_X86_64 || HAVE_I686 + // Add ennbr instructions at the beginning of the fragments + if (filterCode) { + filterCode[fragmentPos++] = 0xf3; + filterCode[fragmentPos++] = 0x0f; + filterCode[fragmentPos++] = 0x1e; + filterCode[fragmentPos++] = ARCH_X86_64 ? 0xfa : 0xfb; + } +#endif + for (i = 0; i < dstW / numSplits; i++) { int xx = xpos >> 16; -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
