On 02/04/26 11:12 am, Saket Kumar Bhaskar wrote:
When verifier sees a timed may_goto instruction, it emits a call to
arch_bpf_timed_may_goto() with a stack offset in BPF_REG_AX
(powerpc64 R12) and expects the refreshed count value to be returned
in the same register. The verifier doesn't save or restore any registers
before emitting this call.

arch_bpf_timed_may_goto() should act as a trampoline to call
bpf_check_timed_may_goto() with powerpc64 ELF ABI calling convention.

To support this custom calling convention, implement
arch_bpf_timed_may_goto() in assembly and make sure BPF caller saved
registers are preserved, then call bpf_check_timed_may_goto with
the powerpc64 ABI calling convention where first argument and return
value both are in R3. Finally, move the result back into BPF_REG_AX(R12)
before returning.

Also, introduce bpf_jit_emit_func_call() that computes the offset from
kernel_toc_addr(), validates that the target and emits the ADDIS/ADDI
sequence to load the function address before performing the indirect
branch via MTCTR/BCTRL. The existing code in bpf_jit_emit_func_call_rel()
is refactored to use this function.

Signed-off-by: Saket Kumar Bhaskar <[email protected]>
---
  arch/powerpc/net/Makefile             |  2 +-
  arch/powerpc/net/bpf_jit_comp.c       |  5 +++
  arch/powerpc/net/bpf_jit_comp64.c     | 59 ++++++++++++++++++++++-----
  arch/powerpc/net/bpf_timed_may_goto.S | 57 ++++++++++++++++++++++++++
  4 files changed, 111 insertions(+), 12 deletions(-)
  create mode 100644 arch/powerpc/net/bpf_timed_may_goto.S

diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile
index 8e60af32e51e..204fc57ac56e 100644
--- a/arch/powerpc/net/Makefile
+++ b/arch/powerpc/net/Makefile
@@ -2,4 +2,4 @@
  #
  # Arch-specific network modules
  #
-obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp$(BITS).o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp$(BITS).o 
bpf_timed_may_goto.o

As this is currently only enabled for CONFIG_PPC64, bpf_timed_may_goto.o
should be compiled conditionally for PPC64:

ifdef CONFIG_PPC64
obj-$(CONFIG_BPF_JIT) += bpf_timed_may_goto.o
endif

diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 50103b3794fb..9b2b456b0765 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -537,6 +537,11 @@ bool bpf_jit_supports_subprog_tailcalls(void)
        return IS_ENABLED(CONFIG_PPC64);
  }
+bool bpf_jit_supports_timed_may_goto(void)
+{
+       return IS_ENABLED(CONFIG_PPC64);
+}
+
  bool bpf_jit_supports_kfunc_call(void)
  {
        return IS_ENABLED(CONFIG_PPC64);
diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index db364d9083e7..d39241444cd9 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -451,10 +451,28 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *, u64, 
u64, u64), void *cookie
        }
  }
+static int bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func_addr, int reg)
+{

+       long reladdr;
+
+       reladdr = func_addr - kernel_toc_addr();

long reladdr = func_addr - kernel_toc_addr();

+       if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+               pr_err("eBPF: address of %ps out of range of kernel_toc.\n", 
(void *)func_addr);
+               return -ERANGE;
+       }
+
+       EMIT(PPC_RAW_ADDIS(reg, _R2, PPC_HA(reladdr)));
+       EMIT(PPC_RAW_ADDI(reg, reg, PPC_LO(reladdr)));
+       EMIT(PPC_RAW_MTCTR(reg));
+       EMIT(PPC_RAW_BCTRL());
+
+       return 0;
+}
+
  int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct 
codegen_context *ctx, u64 func)
  {
        unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0;
-       long reladdr;
+       int ret;
/* bpf to bpf call, func is not known in the initial pass. Emit 5 nops as a placeholder */
        if (!func) {
@@ -469,6 +487,7 @@ int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, 
struct codegen_context *
        }
#ifdef CONFIG_PPC_KERNEL_PCREL

+       long reladdr;

While this works with modern C standard, I would be conservative and let
this be at the start of the function itself..

        reladdr = func_addr - local_paca->kernelbase;
/*
@@ -507,16 +526,9 @@ int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, 
struct codegen_context *
        EMIT(PPC_RAW_BCTRL());
  #else
        if (core_kernel_text(func_addr)) {
-               reladdr = func_addr - kernel_toc_addr();
-               if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
-                       pr_err("eBPF: address of %ps out of range of 
kernel_toc.\n", (void *)func);
-                       return -ERANGE;
-               }
-
-               EMIT(PPC_RAW_ADDIS(_R12, _R2, PPC_HA(reladdr)));
-               EMIT(PPC_RAW_ADDI(_R12, _R12, PPC_LO(reladdr)));
-               EMIT(PPC_RAW_MTCTR(_R12));
-               EMIT(PPC_RAW_BCTRL());
+               ret = bpf_jit_emit_func_call(image, ctx, func_addr, _R12);
+               if (ret)
+                       return ret;
        } else {
                if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1)) {
                        /* func points to the function descriptor */
@@ -1755,6 +1767,31 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
u32 *fimage, struct code
                        if (ret < 0)
                                return ret;
+ /*
+                        * Call to arch_bpf_timed_may_goto() is emitted by the
+                        * verifier and called with custom calling convention 
with
+                        * first argument and return value in BPF_REG_AX (_R12).
+                        *
+                        * The generic helper or bpf function call emission path
+                        * may use the same scratch register as BPF_REG_AX to
+                        * materialize the target address. This would clobber AX
+                        * and break timed may_goto semantics.
+                        *
+                        * Emit a minimal indirect call sequence here using a 
temp
+                        * register and skip the normal post-call return-value 
move.
+                        */
+
+                       if (func_addr == (u64)arch_bpf_timed_may_goto) {

+#ifdef CONFIG_PPC_KERNEL_PCREL
+                               PPC_LI_ADDR(tmp1_reg, func_addr);
+                               EMIT(PPC_RAW_MTCTR(tmp1_reg));
+                               EMIT(PPC_RAW_BCTRL());
+#else
+                               bpf_jit_emit_func_call(image, ctx, func_addr, 
tmp1_reg);

The error check is missing for the above function..
Actually, how about:

        ret = 0;
        if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
                ret = bpf_jit_emit_func_call(image, ctx, func_addr, tmp1_reg);

        if (ret || IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
                PPC_LI_ADDR(tmp1_reg, func_addr);
                EMIT(PPC_RAW_MTCTR(tmp1_reg));
                EMIT(PPC_RAW_BCTRL());
        }

+#endif
+                               break;
+                       }
+
                        /* Take care of powerpc ABI requirements before kfunc 
call */
                        if (insn[i].src_reg == BPF_PSEUDO_KFUNC_CALL) {
                                if (prepare_for_kfunc_call(fp, image, ctx, 
&insn[i]))
diff --git a/arch/powerpc/net/bpf_timed_may_goto.S 
b/arch/powerpc/net/bpf_timed_may_goto.S
new file mode 100644
index 000000000000..0b9afe3cfa1f
--- /dev/null
+++ b/arch/powerpc/net/bpf_timed_may_goto.S
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2025 IBM Corporation, Saket Kumar Bhaskar 
<[email protected]> */
+
+#include <linux/linkage.h>
+#include <asm/ppc_asm.h>
+
+/*
+ * arch_bpf_timed_may_goto() trampoline for powerpc64
+ *
+ * Custom BPF convention (verifier/JIT):
+ *     - input:  stack offset in BPF_REG_AX (r12)
+ *     - output: updated count in BPF_REG_AX (r12)
+ *
+ * Call bpf_check_timed_may_goto(ptr) with normal powerpc64 ABI:
+ *     - r3 = ptr, return in r3
+ *
+ * Preserve BPF regs R0-R5 (mapping: r8, r3-r7).
+ */
+
+SYM_FUNC_START(arch_bpf_timed_may_goto)
+       /* Prologue: save LR, allocate frame */
+       mflr    r0
+       std     r0, 16(r1)
+       stdu    r1, -112(r1)
+
+       /* Save BPF registers R0 - R5 (r8, r3-r7) */

+       std     r3, 24(r1)

May not matter much for this handwritten function but can we use
32 and on instead, as 32 is the MIN_FRAME_SIZE on ABIv2?

+       std     r4, 32(r1)
+       std     r5, 40(r1)
+       std     r6, 48(r1)
+       std     r7, 56(r1)
+       std     r8, 64(r1)
+
+       /*
+        * r3 = BPF_REG_FP + BPF_REG_AX
+        * BPF_REG_FP is r31; BPF_REG_AX is r12 (stack offset in bytes).
+        */
+       add     r3, r31, r12
+       bl      bpf_check_timed_may_goto
+
+       /* Put return value back into AX */
+       mr      r12, r3
+
+       /* Restore BPF registers R0 - R5 (r8, r3-r7) */
+       ld      r3, 24(r1)
+       ld      r4, 32(r1)
+       ld      r5, 40(r1)
+       ld      r6, 48(r1)
+       ld      r7, 56(r1)
+       ld      r8, 64(r1)
+
+       /* Epilogue: pop frame, restore LR, return */
+       addi    r1, r1, 112
+       ld      r0, 16(r1)
+       mtlr    r0
+       blr
+SYM_FUNC_END(arch_bpf_timed_may_goto)

- Hari

Reply via email to