PR #23449 opened by Kacper Michajłow (kasper93)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23449
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23449.patch


From a3f2ae10069cbd09e6727e62555fc7c0dc72819c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]>
Date: Thu, 11 Jun 2026 03:16:13 +0200
Subject: [PATCH 1/9] avutil/x86/x86inc: add ENDBR define

Maps to endbr64/endbr32 depending on target arch, disabled on pre-i686.
---
 libavutil/x86/x86inc.asm | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index 0e80ebed43..edd7d18f52 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -87,6 +87,29 @@
     %define FORCE_VEX_ENCODING 0
 %endif
 
+; Mark a valid target for indirect branches when Indirect Branch Tracking is
+; enabled. Except on pre-i686 where endbr32 is not a valid instruction 
encoding.
+%if ARCH_X86_64 || HAVE_I686
+    %ifdef __NASM_VERSION_ID__
+        %if __NASM_VERSION_ID__ >= 0x020e0000 ; 2.14
+            %if ARCH_X86_64
+                %define ENDBR endbr64
+            %else
+                %define ENDBR endbr32
+            %endif
+        %endif
+    %endif
+    %ifndef ENDBR
+        %if ARCH_X86_64
+            %define ENDBR db 0xf3, 0x0f, 0x1e, 0xfa ; endbr64
+        %else
+            %define ENDBR db 0xf3, 0x0f, 0x1e, 0xfb ; endbr32
+        %endif
+    %endif
+%else
+    %define ENDBR
+%endif
+
 ; aout does not support align=
 ; NOTE: This section is out of sync with x264, in order to
 ; keep supporting OS/2.
-- 
2.52.0


From c99c41b30e7620554f6d5e59b0d968fe25cf3e8d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]>
Date: Thu, 11 Jun 2026 03:22:51 +0200
Subject: [PATCH 2/9] avutil/x86/x86inc: emit ENDBR at function entry points

---
 libavutil/x86/x86inc.asm | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index edd7d18f52..5f63e1d364 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -872,6 +872,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, 
jng, jnge, ja, jae,
     %endif
     align function_align
     %2:
+    ENDBR
     RESET_MM_PERMUTATION        ; needed for x86-64, also makes disassembly 
somewhat nicer
     %xdefine rstk rsp           ; copy of the original stack pointer, used 
when greater alignment than the known stack alignment is required
     %assign stack_offset 0      ; stack pointer offset relative to the return 
address
@@ -893,6 +894,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, 
jng, jnge, ja, jae,
         global current_function %+ %1
     %endif
     %1:
+    ENDBR
 %endmacro
 
 %macro cextern 1
-- 
2.52.0


From 13f577f509e9ece84e8351c2ac45260289014e9f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]>
Date: Thu, 11 Jun 2026 03:23:16 +0200
Subject: [PATCH 3/9] avutil/x86/x86inc: mark objects as IBT compatible

---
 libavutil/x86/x86inc.asm | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index 5f63e1d364..0bafbc8054 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -939,7 +939,8 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, 
jng, jnge, ja, jae,
                 db "GNU",0     ; n_name
                 dd 0xc0000002  ; pr_type = GNU_PROPERTY_X86_FEATURE_1_AND
                 dd 0x00000004  ; pr_datasz
-                dd 0x00000002  ; pr_data = GNU_PROPERTY_X86_FEATURE_1_SHSTK
+                dd 0x00000003  ; pr_data = GNU_PROPERTY_X86_FEATURE_1_IBT |
+                               ;           GNU_PROPERTY_X86_FEATURE_1_SHSTK
                 dd 0x00000000  ; pr_padding
             %endif
         %endif
-- 
2.52.0


From dafe014fc11e3a54e4f9903f42315768a8c9140d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]>
Date: Thu, 11 Jun 2026 03:14:58 +0200
Subject: [PATCH 4/9] avutil/x86/asm: add ENDBR define

Maps to endbr64/endbr32 depending on target arch, disabled on pre-i686.
---
 libavutil/x86/asm.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/libavutil/x86/asm.h b/libavutil/x86/asm.h
index f06ea25035..b51c0e393a 100644
--- a/libavutil/x86/asm.h
+++ b/libavutil/x86/asm.h
@@ -98,6 +98,16 @@ typedef int x86_reg;
 #    define XMM_CLOBBERS_ONLY(...)
 #endif
 
+/* Mark a valid target for indirect branches when Indirect Branch Tracking
+ * is enabled. Except on pre-i686 where endbr32 is not a valid instruction 
encoding. */
+#if ARCH_X86_64
+#    define ENDBR ".byte 0xf3, 0x0f, 0x1e, 0xfa\n\t" /* endbr64 */
+#elif HAVE_I686
+#    define ENDBR ".byte 0xf3, 0x0f, 0x1e, 0xfb\n\t" /* endbr32 */
+#else
+#    define ENDBR
+#endif
+
 /* Use to export labels from asm. */
 #define LABEL_MANGLE(a) EXTERN_PREFIX #a
 
-- 
2.52.0


From c0945e187411da861c99eb744e87b7301d2538ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]>
Date: Thu, 11 Jun 2026 03:17:10 +0200
Subject: [PATCH 5/9] [TO BE UPSTREAMED] do not merge

---
 tests/checkasm/ext/src/x86/x86inc.asm | 31 ++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/tests/checkasm/ext/src/x86/x86inc.asm 
b/tests/checkasm/ext/src/x86/x86inc.asm
index ece93f1452..957ea98f98 100644
--- a/tests/checkasm/ext/src/x86/x86inc.asm
+++ b/tests/checkasm/ext/src/x86/x86inc.asm
@@ -97,6 +97,32 @@
     %define FORCE_VEX_ENCODING 0
 %endif
 
+; Mark a valid target for indirect branches when Indirect Branch Tracking is
+; enabled. Except on pre-i686 where endbr32 is not a valid instruction 
encoding.
+%ifndef HAVE_I686
+    %define HAVE_I686 0
+%endif
+%if ARCH_X86_64 || HAVE_I686
+    %ifdef __NASM_VERSION_ID__
+        %if __NASM_VERSION_ID__ >= 0x020e0000 ; 2.14
+            %if ARCH_X86_64
+                %define ENDBR endbr64
+            %else
+                %define ENDBR endbr32
+            %endif
+        %endif
+    %endif
+    %ifndef ENDBR
+        %if ARCH_X86_64
+            %define ENDBR db 0xf3, 0x0f, 0x1e, 0xfa ; endbr64
+        %else
+            %define ENDBR db 0xf3, 0x0f, 0x1e, 0xfb ; endbr32
+        %endif
+    %endif
+%else
+    %define ENDBR
+%endif
+
 %macro SECTION_RODATA 0-1 16
     %ifidn __OUTPUT_FORMAT__,win32
         SECTION .rdata align=%1
@@ -861,6 +887,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, 
jng, jnge, ja, jae,
     %endif
     align function_align
     %2:
+    ENDBR
     RESET_MM_PERMUTATION        ; needed for x86-64, also makes disassembly 
somewhat nicer
     %xdefine rstk rsp           ; copy of the original stack pointer, used 
when greater alignment than the known stack alignment is required
     %assign stack_offset 0      ; stack pointer offset relative to the return 
address
@@ -882,6 +909,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, 
jng, jnge, ja, jae,
         global current_function %+ %1
     %endif
     %1:
+    ENDBR
 %endmacro
 
 %macro cextern 1
@@ -926,7 +954,8 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, 
jng, jnge, ja, jae,
                 db "GNU",0     ; n_name
                 dd 0xc0000002  ; pr_type = GNU_PROPERTY_X86_FEATURE_1_AND
                 dd 0x00000004  ; pr_datasz
-                dd 0x00000002  ; pr_data = GNU_PROPERTY_X86_FEATURE_1_SHSTK
+                dd 0x00000003  ; pr_data = GNU_PROPERTY_X86_FEATURE_1_IBT |
+                               ;           GNU_PROPERTY_X86_FEATURE_1_SHSTK
                 dd 0x00000000  ; pr_padding
             %endif
         %endif
-- 
2.52.0


From c16319a861dc8e2c87baab8fc6b97143703f55b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]>
Date: Thu, 11 Jun 2026 03:18:31 +0200
Subject: [PATCH 6/9] avcodec/x86/mlpdsp_init: mark indirect branch targets
 with ENDBR

---
 libavcodec/x86/mlpdsp_init.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/x86/mlpdsp_init.c b/libavcodec/x86/mlpdsp_init.c
index 21a0e38143..5f95ef9423 100644
--- a/libavcodec/x86/mlpdsp_init.c
+++ b/libavcodec/x86/mlpdsp_init.c
@@ -76,6 +76,7 @@ static const void * const iirtable[5] = { &ff_mlp_iirorder_0, 
&ff_mlp_iirorder_1
 
 #define MLPMUL(label, offset, offs, offc)   \
     LABEL_MANGLE(label)":             \n\t" \
+    ENDBR                                   \
     "movslq "offset"+"offs"(%0), %%rax\n\t" \
     "movslq "offset"+"offc"(%1), %%rdx\n\t" \
     "imul                 %%rdx, %%rax\n\t" \
@@ -83,6 +84,7 @@ static const void * const iirtable[5] = { &ff_mlp_iirorder_0, 
&ff_mlp_iirorder_1
 
 #define FIRMULREG(label, offset, firc)\
     LABEL_MANGLE(label)":       \n\t" \
+    ENDBR                             \
     "movslq "#offset"(%0), %%rax\n\t" \
     "imul        %"#firc", %%rax\n\t" \
     "add            %%rax, %%rsi\n\t"
@@ -101,6 +103,7 @@ static const void * const iirtable[5] = { 
&ff_mlp_iirorder_0, &ff_mlp_iirorder_1
 
 #define MLPMUL(label, offset, offs, offc)  \
     LABEL_MANGLE(label)":            \n\t" \
+    ENDBR                                  \
     "mov   "offset"+"offs"(%0), %%eax\n\t" \
     "imull "offset"+"offc"(%1)       \n\t" \
     "add                %%eax , %%esi\n\t" \
@@ -155,12 +158,14 @@ static void mlp_filter_channel_x86(int32_t *state, const 
int32_t *coeff,
         FIRMUL   (ff_mlp_firorder_2, 0x04   )
         FIRMULREG(ff_mlp_firorder_1, 0x00, 8)
         LABEL_MANGLE(ff_mlp_firorder_0)":\n\t"
+        ENDBR
         "jmp  *%6                     \n\t"
         IIRMUL   (ff_mlp_iirorder_4, 0x0c   )
         IIRMUL   (ff_mlp_iirorder_3, 0x08   )
         IIRMUL   (ff_mlp_iirorder_2, 0x04   )
         IIRMUL   (ff_mlp_iirorder_1, 0x00   )
         LABEL_MANGLE(ff_mlp_iirorder_0)":\n\t"
+        ENDBR
         SHIFT_ACCUM
         "mov  "RESULT"  ,"ACCUM"      \n\t"
         "add  (%2)      ,"RESULT"     \n\t"
-- 
2.52.0


From 94499e71d1ddb833404cf26a45c0f6ad4aa3526b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]>
Date: Thu, 11 Jun 2026 03:20:28 +0200
Subject: [PATCH 7/9] avcodec/x86/vp9itxfm: mark indirect branch targets with
 ENDBR

---
 libavcodec/x86/vp9itxfm_16bpp_avx512.asm | 2 ++
 libavcodec/x86/vp9itxfm_avx2.asm         | 6 ++++++
 libavcodec/x86/vp9itxfm_avx512.asm       | 2 ++
 3 files changed, 10 insertions(+)

diff --git a/libavcodec/x86/vp9itxfm_16bpp_avx512.asm 
b/libavcodec/x86/vp9itxfm_16bpp_avx512.asm
index 1924233469..4d5dd4a129 100644
--- a/libavcodec/x86/vp9itxfm_16bpp_avx512.asm
+++ b/libavcodec/x86/vp9itxfm_16bpp_avx512.asm
@@ -375,6 +375,7 @@ cglobal vp9_idct_16x16_internal_10, 0, 7, 22, dst, stride, 
c, eob, tx2
     TRANSPOSE_4D          4,  5,  6,  7, 16
     jmp                tx2q
 .pass2:
+    ENDBR
     test               eobd, eobd
     jl .pass2_fast
     call .main_part1
@@ -603,6 +604,7 @@ cglobal vp9_iadst_16x16_internal_10, 0, 7, 22, dst, stride, 
c, eob, tx2
     WRAP_YMM IADST16_PASS1_END
     jmp m(vp9_idct_16x16_internal_10).pass1_fast_end
 .pass2:
+    ENDBR
     test               eobd, eobd
     jl .pass2_fast
     call .main_part1
diff --git a/libavcodec/x86/vp9itxfm_avx2.asm b/libavcodec/x86/vp9itxfm_avx2.asm
index c5ee2426e2..e632774713 100644
--- a/libavcodec/x86/vp9itxfm_avx2.asm
+++ b/libavcodec/x86/vp9itxfm_avx2.asm
@@ -336,6 +336,7 @@ cglobal vp9_idct_4x4_internal, 0, 5, 6, dst, stride, c, 
eob, tx2
     pshufb               m1, m3, m2
     jmp                tx2q
 .pass2:
+    ENDBR
     call .main
 .pass2_end:
     vpbroadcastd         m2, [o(pw_2048)]
@@ -382,6 +383,7 @@ cglobal vp9_iadst_4x4_internal, 0, 5, 6, dst, stride, c, 
eob, tx2
     call .main
     jmp m(vp9_idct_4x4_internal).pass1_end
 .pass2:
+    ENDBR
     call .main
     jmp m(vp9_idct_4x4_internal).pass2_end
 ALIGN function_align
@@ -481,6 +483,7 @@ cglobal vp9_idct_8x8_internal, 0, 5, 8, dst, stride, c, 
eob, tx2
     vperm2i128           m3, m5, m3, 0x31
     jmp                tx2q
 .pass2:
+    ENDBR
     call .main
     vpbroadcastd         m4, [o(pw_1024)]
     vpermq               m1, m1, q2031
@@ -553,6 +556,7 @@ cglobal vp9_iadst_8x8_internal, 0, 5, 8, dst, stride, c, 
eob, tx2
     vinserti128          m1, m4, xm1, 1
     jmp                tx2q
 .pass2:
+    ENDBR
     pshufd               m4, m0, q1032
     pshufd               m5, m1, q1032
     call .main
@@ -923,6 +927,7 @@ cglobal vp9_idct_16x16_internal, 0, 5, 16, 32*6, dst, 
stride, c, eob, tx2
     call .transpose_8x8
     jmp                tx2q
 .pass2:
+    ENDBR
     test               eobd, eobd
     jl .pass2_fast
     call .main
@@ -1039,6 +1044,7 @@ cglobal vp9_iadst_16x16_internal, 0, 5, 16, 32*6, dst, 
stride, c, eob, tx2
     mova                xm0, [rsp+32*0]
     jmp m(vp9_idct_16x16_internal).pass1_fast_end
 .pass2:
+    ENDBR
     test               eobd, eobd
     jl .pass2_fast
     call .main
diff --git a/libavcodec/x86/vp9itxfm_avx512.asm 
b/libavcodec/x86/vp9itxfm_avx512.asm
index d51c50756d..788a63f222 100644
--- a/libavcodec/x86/vp9itxfm_avx512.asm
+++ b/libavcodec/x86/vp9itxfm_avx512.asm
@@ -524,6 +524,7 @@ cglobal vp9_idct_16x16_internal, 0, 5, 16, dst, stride, c, 
eob, tx2
     punpckldq            m0, m4     ; 0-1
     jmp                tx2q
 .pass2:
+    ENDBR
     test               eobd, eobd
     jl .pass2_fast
     call .main
@@ -771,6 +772,7 @@ cglobal vp9_iadst_16x16_internal, 0, 5, 16, dst, stride, c, 
eob, tx2
     vpermt2q             m3, m5, m4
     jmp                tx2q
 .pass2:
+    ENDBR
     pshufd               m1, m1, q1032
     pshufd               m3, m3, q1032
     test               eobd, eobd
-- 
2.52.0


From f75f92d4cf88e7335574e28378a00b39b7f21db3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]>
Date: Thu, 11 Jun 2026 03:20:48 +0200
Subject: [PATCH 8/9] avcodec/x86/vvc/mc: mark indirect branch targets with
 ENDBR

---
 libavcodec/x86/vvc/mc.asm | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/libavcodec/x86/vvc/mc.asm b/libavcodec/x86/vvc/mc.asm
index 4f078ea8d0..2ccd20d45f 100644
--- a/libavcodec/x86/vvc/mc.asm
+++ b/libavcodec/x86/vvc/mc.asm
@@ -76,6 +76,7 @@ SECTION .text
 %if %3
 INIT_XMM cpuname
 .w2:
+    ENDBR
     movd                xm0, [src0q]
     pinsrd              xm0, [src0q + AVG_SRC_STRIDE], 1
     movd                xm1, [src1q]
@@ -85,6 +86,7 @@ INIT_XMM cpuname
     AVG_LOOP_END        .w2
 
 .w4:
+    ENDBR
     movq                xm0, [src0q]
     pinsrq              xm0, [src0q + AVG_SRC_STRIDE], 1
     movq                xm1, [src1q]
@@ -96,6 +98,7 @@ INIT_XMM cpuname
 
 INIT_YMM cpuname
 .w8:
+    ENDBR
     movu               xm0, [src0q]
     movu               xm1, [src1q]
     vinserti128         m0, m0, [src0q + AVG_SRC_STRIDE], 1
@@ -106,21 +109,25 @@ INIT_YMM cpuname
     AVG_LOOP_END       .w8
 
 .w16:
+    ENDBR
     AVG_W16_FN          %1, %2, 1
 
     AVG_LOOP_END       .w16
 
 .w32:
+    ENDBR
     AVG_W16_FN          %1, %2, 2
 
     AVG_LOOP_END       .w32
 
 .w64:
+    ENDBR
     AVG_W16_FN          %1, %2, 4
 
     AVG_LOOP_END       .w64
 
 .w128:
+    ENDBR
     AVG_W16_FN          %1, %2, 8
 
     AVG_LOOP_END       .w128
-- 
2.52.0


From c31ff7f1780c0cc1e1a356f32ba84c94363aec79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <[email protected]>
Date: Thu, 11 Jun 2026 03:21:32 +0200
Subject: [PATCH 9/9] swscale/x86/hscale_fast_bilinear_simd: mark indirect
 branch targets with ENDBR

---
 libswscale/x86/hscale_fast_bilinear_simd.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/libswscale/x86/hscale_fast_bilinear_simd.c 
b/libswscale/x86/hscale_fast_bilinear_simd.c
index d8a4e444b4..cd11874349 100644
--- a/libswscale/x86/hscale_fast_bilinear_simd.c
+++ b/libswscale/x86/hscale_fast_bilinear_simd.c
@@ -131,6 +131,16 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, 
uint8_t *filterCode,
     xpos        = 0; // lumXInc/2 - 0x8000; // difference between pixel centers
     fragmentPos = 0;
 
+#if ARCH_X86_64 || HAVE_I686
+    // Add ennbr instructions at the beginning of the fragments
+    if (filterCode) {
+        filterCode[fragmentPos++] = 0xf3;
+        filterCode[fragmentPos++] = 0x0f;
+        filterCode[fragmentPos++] = 0x1e;
+        filterCode[fragmentPos++] = ARCH_X86_64 ? 0xfa : 0xfb;
+    }
+#endif
+
     for (i = 0; i < dstW / numSplits; i++) {
         int xx = xpos >> 16;
 
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to