When JITing an eBPF program on x86_64, also JIT a list_func that calls the
 bpf_func in a loop.  Since this is a direct call, it should perform better
 than indirect-calling bpf_func in a loop.
Since getting the percpu 'redirect_info' variable is ugly and magic, pass
 a pointer into the list_func as a parameter instead of calculating it
 inside the loop.  This is safe because BPF execution is not preemptible,
 so the percpu variable can't get moved while we're using it.

Signed-off-by: Edward Cree <ec...@solarflare.com>
---
 arch/x86/net/bpf_jit_comp.c | 164 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/filter.h      |  19 +++--
 kernel/bpf/core.c           |  18 +++--
 net/core/dev.c              |   5 +-
 4 files changed, 195 insertions(+), 11 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 2580cd2e98b1..3e06dd79adda 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1060,6 +1060,169 @@ struct x64_jit_data {
        struct jit_context ctx;
 };
 
+static void try_do_jit_list(struct bpf_prog *bpf_prog,
+                           unsigned int (*bpf_func)(const void *ctx,
+                                                    const struct bpf_insn 
*insn))
+{
+       /* list_func takes three arguments:
+        * struct list_head *list [RDI]
+        * const struct bpf_insn *insn [RSI]
+        * const struct redirect_info *percpu_ri [RDX]
+        *
+        * Layout of struct bpf_work on x86_64:
+        * struct list_head {
+        *      struct list_head *next; // 0x0
+        *      struct list_head *prev; // 0x8
+        * } list; // 0x0
+        * void *ctx; 0x10
+        * struct redirect_info {
+        *      u32 ifindex; // 0x18
+        *      u32 flags; // 0x1c
+        *      struct bpf_map *map; // 0x20
+        *      struct bpf_map *map_to_flush; // 0x28
+        *      unsigned long   map_owner; // 0x30
+        * } ri; // 0x18
+        * unsigned long ret; // 0x38
+        * (total size 0x40 bytes)
+        *
+        * Desired function body:
+        * struct redirect_info *ri = percpu_ri; [R12]
+        * struct bpf_work *work; [RBP]
+        *
+        * list_for_each_entry(work, list, list) {
+        *      work->ret = (*bpf_func)(work->ctx, insn);
+        *      work->ri = *ri;
+        * }
+        *
+        * Assembly to emit:
+        * ; save CSRs
+        *      push %rbx
+        *      push %rbp
+        *      push %r12
+        * ; stash pointer to redirect_info
+        *      mov %rdx,%r12   ; ri = percpu_ri
+        * ; start list
+        *      mov %rdi,%rbp   ; head = list
+        * next:                ; while (true) {
+        *      mov (%rbp),%rbx ; rbx = head->next
+        *      cmp %rbx,%rdi   ; if (rbx == list)
+        *      je out          ;       break
+        *      mov %rbx,%rbp   ; head = rbx
+        *      push %rdi       ; save list
+        *      push %rsi       ; save insn (is still arg2)
+        * ; struct bpf_work *work = head (container_of, but list is first 
member)
+        *      mov 0x10(%rbp),%rdi; arg1 = work->ctx
+        *      callq bpf_func  ; rax = (*bpf_func)(work->ctx, insn)
+        *      mov %rax,0x38(%rbp); work->ret = rax
+        * ; work->ri = *ri
+        *      mov (%r12),%rdx
+        *      mov %rdx,0x18(%rbp)
+        *      mov 0x8(%r12),%rdx
+        *      mov %rdx,0x20(%rbp)
+        *      mov 0x10(%r12),%rdx
+        *      mov %rdx,0x28(%rbp)
+        *      mov 0x18(%r12),%rdx
+        *      mov %rdx,0x30(%rbp)
+        *      pop %rsi        ; restore insn
+        *      pop %rdi        ; restore list
+        *      jmp next        ; }
+        * out:
+        * ; restore CSRs and return
+        *      pop %r12
+        *      pop %rbp
+        *      pop %rbx
+        *      retq
+        */
+       u8 *image, *prog, *from_to_out, *next;
+       struct bpf_binary_header *header;
+       int off, cnt = 0;
+       s64 jmp_offset;
+
+       /* Prog should be 81 bytes; let's round up to 128 */
+       header = bpf_jit_binary_alloc(128, &image, 1, jit_fill_hole);
+       prog = image;
+
+       /* push rbx */
+       EMIT1(0x53);
+       /* push rbp */
+       EMIT1(0x55);
+       /* push %r12 */
+       EMIT2(0x41, 0x54);
+       /* mov %rdx,%r12 */
+       EMIT3(0x49, 0x89, 0xd4);
+       /* mov %rdi,%rbp */
+       EMIT3(0x48, 0x89, 0xfd);
+       next = prog;
+       /* mov 0x0(%rbp),%rbx */
+       EMIT4(0x48, 0x8b, 0x5d, 0x00);
+       /* cmp %rbx,%rdi */
+       EMIT3(0x48, 0x39, 0xdf);
+       /* je out */
+       EMIT2(X86_JE, 0);
+       from_to_out = prog; /* record . to patch this jump later */
+       /* mov %rbx,%rbp */
+       EMIT3(0x48, 0x89, 0xdd);
+       /* push %rdi */
+       EMIT1(0x57);
+       /* push %rsi */
+       EMIT1(0x56);
+       /* mov 0x10(%rbp),%rdi */
+       EMIT4(0x48, 0x8b, 0x7d, 0x10);
+       /* e8 callq bpf_func */
+       jmp_offset = (u8 *)bpf_func - (prog + 5);
+       if (!is_simm32(jmp_offset)) {
+               pr_err("call out of range to BPF func %p from list image %p\n",
+                      bpf_func, image);
+               goto fail;
+       }
+       EMIT1_off32(0xE8, jmp_offset);
+       /* mov %rax,0x38(%rbp) */
+       EMIT4(0x48, 0x89, 0x45, 0x38);
+       /* mov (%r12),%rdx */
+       EMIT4(0x49, 0x8b, 0x14, 0x24);
+       /* mov %rdx,0x18(%rbp) */
+       EMIT4(0x48, 0x89, 0x55, 0x18);
+       for (off = 0x8; off < 0x20; off += 0x8) {
+               /* mov off(%r12),%rdx */
+               EMIT4(0x49, 0x8b, 0x54, 0x24);
+               EMIT1(off);
+               /* mov %rdx,0x18+off(%rbp) */
+               EMIT4(0x48, 0x89, 0x55, 0x18 + off);
+       }
+       /* pop %rsi */
+       EMIT1(0x5e);
+       /* pop %rdi */
+       EMIT1(0x5f);
+       /* jmp next */
+       jmp_offset = next - (prog + 2);
+       if (WARN_ON(!is_imm8(jmp_offset))) /* can't happen */
+               goto fail;
+       EMIT2(0xeb, jmp_offset);
+       /* out: */
+       jmp_offset = prog - from_to_out;
+       if (WARN_ON(!is_imm8(jmp_offset))) /* can't happen */
+               goto fail;
+       from_to_out[-1] = jmp_offset;
+       /* pop %r12 */
+       EMIT2(0x41, 0x5c);
+       /* pop %rbp */
+       EMIT1(0x5d);
+       /* pop %rbx */
+       EMIT1(0x5b);
+       /* retq */
+       EMIT1(0xc3);
+       /* If we were wrong about how much space we needed, scream and shout */
+       WARN_ON(cnt != 81);
+       if (bpf_jit_enable > 1)
+               bpf_jit_dump(0, cnt, 0, image);
+       bpf_jit_binary_lock_ro(header);
+       bpf_prog->list_func = (void *)image;
+       bpf_prog->jited_list = 1;
+       return;
+fail:
+       bpf_jit_binary_free(header);
+}
+
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
        struct bpf_binary_header *header = NULL;
@@ -1176,6 +1339,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog 
*prog)
                prog->bpf_func = (void *)image;
                prog->jited = 1;
                prog->jited_len = proglen;
+               try_do_jit_list(prog, prog->bpf_func);
        } else {
                prog = orig_prog;
        }
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7d813034e286..ad1e75bf0991 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -517,7 +517,8 @@ struct bpf_prog {
                                            const struct bpf_insn *insn);
        /* Takes a list of struct bpf_work */
        void                    (*list_func)(struct list_head *list,
-                                            const struct bpf_insn *insn);
+                                            const struct bpf_insn *insn,
+                                            const struct redirect_info 
*percpu_ri);
        /* Instructions for interpreter */
        union {
                struct sock_filter      insns[0];
@@ -532,7 +533,7 @@ struct sk_filter {
 };
 
 #define BPF_PROG_RUN(filter, ctx)  (*(filter)->bpf_func)(ctx, (filter)->insnsi)
-#define BPF_LIST_PROG_RUN(filter, list) (*(filter)->list_func)(list, 
(filter)->insnsi)
+#define BPF_LIST_PROG_RUN(filter, list, percpu) (*(filter)->list_func)(list, 
(filter)->insnsi, percpu)
 
 #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
 
@@ -638,10 +639,11 @@ static __always_inline u32 bpf_prog_run_xdp(const struct 
bpf_prog *prog,
 }
 
 static __always_inline void bpf_list_prog_run_xdp(const struct bpf_prog *prog,
-                                                 struct list_head *list)
+                                                 struct list_head *list,
+                                                 const struct redirect_info 
*percpu_ri)
 {
        /* Caller must hold rcu_read_lock(), as per bpf_prog_run_xdp(). */
-       BPF_LIST_PROG_RUN(prog, list);
+       BPF_LIST_PROG_RUN(prog, list, percpu_ri);
 }
 
 static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
@@ -756,6 +758,15 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp)
        return (void *)addr;
 }
 
+static inline struct bpf_binary_header *
+bpf_list_jit_binary_hdr(const struct bpf_prog *fp)
+{
+       unsigned long real_start = (unsigned long)fp->list_func;
+       unsigned long addr = real_start & PAGE_MASK;
+
+       return (void *)addr;
+}
+
 #ifdef CONFIG_ARCH_HAS_SET_MEMORY
 static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
 {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c35da826cc3b..028be88c4af8 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -621,15 +621,22 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr)
  */
 void __weak bpf_jit_free(struct bpf_prog *fp)
 {
-       if (fp->jited) {
-               struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
+       struct bpf_binary_header *hdr;
 
+       if (fp->jited) {
+               hdr = bpf_jit_binary_hdr(fp);
                bpf_jit_binary_unlock_ro(hdr);
                bpf_jit_binary_free(hdr);
 
                WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
        }
 
+       if (fp->jited_list) {
+               hdr = bpf_list_jit_binary_hdr(fp);
+               bpf_jit_binary_unlock_ro(hdr);
+               bpf_jit_binary_free(hdr);
+       }
+
        bpf_prog_unlock_free(fp);
 }
 
@@ -1358,13 +1365,13 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, 
u64 r3, u64 r4, u64 r5, \
 
 #define LIST_PROG_NAME(stack_size) __bpf_list_prog_run##stack_size
 #define DEFINE_BPF_LIST_PROG_RUN(stack_size) \
-static void LIST_PROG_NAME(stack_size)(struct list_head *list, const struct 
bpf_insn *insn) \
+static void LIST_PROG_NAME(stack_size)(struct list_head *list, const struct 
bpf_insn *insn, const struct redirect_info *percpu_ri) \
 { \
        struct bpf_work *work; \
 \
        list_for_each_entry(work, list, list) { \
                work->ret = PROG_NAME(stack_size)(work->ctx, insn); \
-               work->ri = *this_cpu_ptr(&redirect_info); \
+               work->ri = *percpu_ri; \
        } \
 }
 
@@ -1398,7 +1405,8 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
 #undef PROG_NAME_LIST
 #define PROG_NAME_LIST(stack_size) LIST_PROG_NAME(stack_size),
 static void (*list_interpreters[])(struct list_head *list,
-                                  const struct bpf_insn *insn) = {
+                                  const struct bpf_insn *insn,
+                                  const struct redirect_info *percpu_ri) = {
 EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
 EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
 EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
diff --git a/net/core/dev.c b/net/core/dev.c
index 746112c22afd..7c1879045ef8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4214,6 +4214,7 @@ static void do_xdp_list_generic(struct bpf_prog *xdp_prog,
                                struct sk_buff_head *list,
                                struct sk_buff_head *pass_list)
 {
+       const struct redirect_info *percpu_ri = this_cpu_ptr(&redirect_info);
        struct xdp_work (*xwa)[NAPI_POLL_WEIGHT], *xw;
        struct bpf_work *bw;
        struct sk_buff *skb;
@@ -4249,11 +4250,11 @@ static void do_xdp_list_generic(struct bpf_prog 
*xdp_prog,
 
        if (xdp_prog->list_func && (xdp_prog->jited_list ||
                                    !xdp_prog->jited))
-               bpf_list_prog_run_xdp(xdp_prog, &xdp_list);
+               bpf_list_prog_run_xdp(xdp_prog, &xdp_list, percpu_ri);
        else
                list_for_each_entry(bw, &xdp_list, list) {
                        bw->ret = bpf_prog_run_xdp(xdp_prog, bw->ctx);
-                       bw->ri = *this_cpu_ptr(&redirect_info);
+                       bw->ri = *percpu_ri;
                }
 
        for (i = 0; i < n; i++) {

Reply via email to