x86 tail-call fentry patching mirrors CALL text pokes to the tail-call
landing slot.

The helper that locates that mirrored slot assumes an ENDBR-prefixed
landing, which works on IBT JITs but fails on non-IBT JITs where the
landing starts directly with the 5-byte patch slot.

As a result, the regular entry gets patched but the tail-call landing
remains NOP5, so fentry never fires for tail-called programs on non-IBT
kernels.

Anchor the lookup on the landing address, verify the short-jump layout
first, and only check ENDBR when one is actually emitted.

Signed-off-by: Takeru Hayasaka <[email protected]>
---
 arch/x86/net/bpf_jit_comp.c | 47 ++++++++++++++++++++++++++++++++++---
 1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e9b78040d703..fe5fd37f65d8 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -325,8 +325,10 @@ struct jit_context {
 
 /* Number of bytes emit_patch() needs to generate instructions */
 #define X86_PATCH_SIZE         5
+/* Number of bytes used by the short jump that skips the tail-call hook. */
+#define X86_TAIL_CALL_SKIP_JMP_SIZE    2
 /* Number of bytes that will be skipped on tailcall */
-#define X86_TAIL_CALL_OFFSET   (12 + ENDBR_INSN_SIZE)
+#define X86_TAIL_CALL_OFFSET   (12 + X86_TAIL_CALL_SKIP_JMP_SIZE + 
ENDBR_INSN_SIZE)
 
 static void push_r9(u8 **pprog)
 {
@@ -545,8 +547,15 @@ static void emit_prologue(u8 **pprog, u8 *ip, u32 
stack_depth, bool ebpf_from_cb
                EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
        }
 
+       if (!is_subprog) {
+               /* Normal entry skips the tail-call-only trampoline hook. */
+               EMIT2(0xEB, ENDBR_INSN_SIZE + X86_PATCH_SIZE);
+       }
+
        /* X86_TAIL_CALL_OFFSET is here */
        EMIT_ENDBR();
+       if (!is_subprog)
+               emit_nops(&prog, X86_PATCH_SIZE);
 
        /* sub rsp, rounded_stack_depth */
        if (stack_depth)
@@ -632,12 +641,33 @@ static int __bpf_arch_text_poke(void *ip, enum 
bpf_text_poke_type old_t,
        return ret;
 }
 
+static void *bpf_tail_call_fentry_ip(void *ip)
+{
+       u8 *tail_ip = ip + X86_TAIL_CALL_OFFSET;
+       u8 *landing = tail_ip - ENDBR_INSN_SIZE;
+
+       /* ip points at the regular fentry slot after the entry ENDBR. */
+       if (landing[-X86_TAIL_CALL_SKIP_JMP_SIZE] != 0xEB ||
+           landing[-X86_TAIL_CALL_SKIP_JMP_SIZE + 1] !=
+                   ENDBR_INSN_SIZE + X86_PATCH_SIZE)
+               return NULL;
+
+       if (ENDBR_INSN_SIZE && !is_endbr((u32 *)landing))
+               return NULL;
+
+       return tail_ip;
+}
+
 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
                       enum bpf_text_poke_type new_t, void *old_addr,
                       void *new_addr)
 {
+       void *tail_ip = NULL;
+       bool is_bpf_text = is_bpf_text_address((long)ip);
+       int ret, tail_ret;
+
        if (!is_kernel_text((long)ip) &&
-           !is_bpf_text_address((long)ip))
+           !is_bpf_text)
                /* BPF poking in modules is not supported */
                return -EINVAL;
 
@@ -648,7 +678,18 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type 
old_t,
        if (is_endbr(ip))
                ip += ENDBR_INSN_SIZE;
 
-       return __bpf_arch_text_poke(ip, old_t, new_t, old_addr, new_addr);
+       if (is_bpf_text && (old_t == BPF_MOD_CALL || new_t == BPF_MOD_CALL))
+               tail_ip = bpf_tail_call_fentry_ip(ip);
+
+       ret = __bpf_arch_text_poke(ip, old_t, new_t, old_addr, new_addr);
+       if (ret < 0 || !tail_ip)
+               return ret;
+
+       tail_ret = __bpf_arch_text_poke(tail_ip, old_t, new_t, old_addr, 
new_addr);
+       if (tail_ret < 0)
+               return tail_ret;
+
+       return ret && tail_ret;
 }
 
 #define EMIT_LFENCE()  EMIT3(0x0F, 0xAE, 0xE8)
-- 
2.43.0


Reply via email to