Since LD_ABS/LD_IND instructions are now removed from the core and
reimplemented through a combination of inlined BPF instructions and
a slow-path helper, we can get rid of the complexity from x64 JIT.

Signed-off-by: Daniel Borkmann <dan...@iogearbox.net>
Acked-by: Alexei Starovoitov <a...@kernel.org>
---
 arch/x86/net/Makefile       |   3 +-
 arch/x86/net/bpf_jit.S      | 154 --------------------------------------------
 arch/x86/net/bpf_jit_comp.c | 144 ++---------------------------------------
 3 files changed, 5 insertions(+), 296 deletions(-)
 delete mode 100644 arch/x86/net/bpf_jit.S

diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
index c6b464a..59e123d 100644
--- a/arch/x86/net/Makefile
+++ b/arch/x86/net/Makefile
@@ -5,6 +5,5 @@
 ifeq ($(CONFIG_X86_32),y)
         obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
 else
-        OBJECT_FILES_NON_STANDARD_bpf_jit.o += y
-        obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
+        obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
 endif
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
deleted file mode 100644
index b33093f..0000000
--- a/arch/x86/net/bpf_jit.S
+++ /dev/null
@@ -1,154 +0,0 @@
-/* bpf_jit.S : BPF JIT helper functions
- *
- * Copyright (C) 2011 Eric Dumazet (eric.duma...@gmail.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-/*
- * Calling convention :
- * rbx : skb pointer (callee saved)
- * esi : offset of byte(s) to fetch in skb (can be scratched)
- * r10 : copy of skb->data
- * r9d : hlen = skb->len - skb->data_len
- */
-#define SKBDATA        %r10
-#define SKF_MAX_NEG_OFF    $(-0x200000) /* SKF_LL_OFF from filter.h */
-
-#define FUNC(name) \
-       .globl name; \
-       .type name, @function; \
-       name:
-
-FUNC(sk_load_word)
-       test    %esi,%esi
-       js      bpf_slow_path_word_neg
-
-FUNC(sk_load_word_positive_offset)
-       mov     %r9d,%eax               # hlen
-       sub     %esi,%eax               # hlen - offset
-       cmp     $3,%eax
-       jle     bpf_slow_path_word
-       mov     (SKBDATA,%rsi),%eax
-       bswap   %eax                    /* ntohl() */
-       ret
-
-FUNC(sk_load_half)
-       test    %esi,%esi
-       js      bpf_slow_path_half_neg
-
-FUNC(sk_load_half_positive_offset)
-       mov     %r9d,%eax
-       sub     %esi,%eax               #       hlen - offset
-       cmp     $1,%eax
-       jle     bpf_slow_path_half
-       movzwl  (SKBDATA,%rsi),%eax
-       rol     $8,%ax                  # ntohs()
-       ret
-
-FUNC(sk_load_byte)
-       test    %esi,%esi
-       js      bpf_slow_path_byte_neg
-
-FUNC(sk_load_byte_positive_offset)
-       cmp     %esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
-       jle     bpf_slow_path_byte
-       movzbl  (SKBDATA,%rsi),%eax
-       ret
-
-/* rsi contains offset and can be scratched */
-#define bpf_slow_path_common(LEN)              \
-       lea     32(%rbp), %rdx;\
-       FRAME_BEGIN;                            \
-       mov     %rbx, %rdi; /* arg1 == skb */   \
-       push    %r9;                            \
-       push    SKBDATA;                        \
-/* rsi already has offset */                   \
-       mov     $LEN,%ecx;      /* len */       \
-       call    skb_copy_bits;                  \
-       test    %eax,%eax;                      \
-       pop     SKBDATA;                        \
-       pop     %r9;                            \
-       FRAME_END
-
-
-bpf_slow_path_word:
-       bpf_slow_path_common(4)
-       js      bpf_error
-       mov     32(%rbp),%eax
-       bswap   %eax
-       ret
-
-bpf_slow_path_half:
-       bpf_slow_path_common(2)
-       js      bpf_error
-       mov     32(%rbp),%ax
-       rol     $8,%ax
-       movzwl  %ax,%eax
-       ret
-
-bpf_slow_path_byte:
-       bpf_slow_path_common(1)
-       js      bpf_error
-       movzbl  32(%rbp),%eax
-       ret
-
-#define sk_negative_common(SIZE)                               \
-       FRAME_BEGIN;                                            \
-       mov     %rbx, %rdi; /* arg1 == skb */                   \
-       push    %r9;                                            \
-       push    SKBDATA;                                        \
-/* rsi already has offset */                                   \
-       mov     $SIZE,%edx;     /* size */                      \
-       call    bpf_internal_load_pointer_neg_helper;           \
-       test    %rax,%rax;                                      \
-       pop     SKBDATA;                                        \
-       pop     %r9;                                            \
-       FRAME_END;                                              \
-       jz      bpf_error
-
-bpf_slow_path_word_neg:
-       cmp     SKF_MAX_NEG_OFF, %esi   /* test range */
-       jl      bpf_error       /* offset lower -> error  */
-
-FUNC(sk_load_word_negative_offset)
-       sk_negative_common(4)
-       mov     (%rax), %eax
-       bswap   %eax
-       ret
-
-bpf_slow_path_half_neg:
-       cmp     SKF_MAX_NEG_OFF, %esi
-       jl      bpf_error
-
-FUNC(sk_load_half_negative_offset)
-       sk_negative_common(2)
-       mov     (%rax),%ax
-       rol     $8,%ax
-       movzwl  %ax,%eax
-       ret
-
-bpf_slow_path_byte_neg:
-       cmp     SKF_MAX_NEG_OFF, %esi
-       jl      bpf_error
-
-FUNC(sk_load_byte_negative_offset)
-       sk_negative_common(1)
-       movzbl  (%rax), %eax
-       ret
-
-bpf_error:
-# force a return 0 from jit handler
-       xor     %eax,%eax
-       mov     (%rbp),%rbx
-       mov     8(%rbp),%r13
-       mov     16(%rbp),%r14
-       mov     24(%rbp),%r15
-       add     $40, %rbp
-       leaveq
-       ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 1c3c81d..ce08b7b 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -17,15 +17,6 @@
 #include <asm/set_memory.h>
 #include <asm/nospec-branch.h>
 
-/*
- * Assembly code in arch/x86/net/bpf_jit.S
- */
-extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
-extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
-extern u8 sk_load_byte_positive_offset[];
-extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
-extern u8 sk_load_byte_negative_offset[];
-
 static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 {
        if (len == 1)
@@ -107,9 +98,6 @@ static int bpf_size_to_x86_bytes(int bpf_size)
 #define X86_JLE 0x7E
 #define X86_JG  0x7F
 
-#define CHOOSE_LOAD_FUNC(K, func) \
-       ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : 
func##_positive_offset)
-
 /* Pick a register outside of BPF range for JIT internal work */
 #define AUX_REG (MAX_BPF_JIT_REG + 1)
 
@@ -120,8 +108,8 @@ static int bpf_size_to_x86_bytes(int bpf_size)
  * register in load/store instructions, it always needs an
  * extra byte of encoding and is callee saved.
  *
- * R9  caches skb->len - skb->data_len
- * R10 caches skb->data, and used for blinding (if enabled)
+ * Also x86-64 register R9 is unused. x86-64 register R10 is
+ * used for blinding (if enabled).
  */
 static const int reg2hex[] = {
        [BPF_REG_0] = 0,  /* RAX */
@@ -196,19 +184,15 @@ static void jit_fill_hole(void *area, unsigned int size)
 
 struct jit_context {
        int cleanup_addr; /* Epilogue code offset */
-       bool seen_ld_abs;
-       bool seen_ax_reg;
 };
 
 /* Maximum number of bytes emitted while JITing one eBPF insn */
 #define BPF_MAX_INSN_SIZE      128
 #define BPF_INSN_SAFETY                64
 
-#define AUX_STACK_SPACE \
-       (32 /* Space for RBX, R13, R14, R15 */ + \
-         8 /* Space for skb_copy_bits() buffer */)
+#define AUX_STACK_SPACE                40 /* Space for RBX, R13, R14, R15, 
tailcnt */
 
-#define PROLOGUE_SIZE 37
+#define PROLOGUE_SIZE          37
 
 /*
  * Emit x86-64 prologue code for BPF program and check its size.
@@ -232,20 +216,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, 
bool ebpf_from_cbpf)
        /* sub rbp, AUX_STACK_SPACE */
        EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
 
-       /* All classic BPF filters use R6(rbx) save it */
-
        /* mov qword ptr [rbp+0],rbx */
        EMIT4(0x48, 0x89, 0x5D, 0);
-
-       /*
-        * bpf_convert_filter() maps classic BPF register X to R7 and uses R8
-        * as temporary, so all tcpdump filters need to spill/fill R7(R13) and
-        * R8(R14). R9(R15) spill could be made conditional, but there is only
-        * one 'bpf_error' return path out of helper functions inside bpf_jit.S
-        * The overhead of extra spill is negligible for any filter other
-        * than synthetic ones. Therefore not worth adding complexity.
-        */
-
        /* mov qword ptr [rbp+8],r13 */
        EMIT4(0x4C, 0x89, 0x6D, 8);
        /* mov qword ptr [rbp+16],r14 */
@@ -353,27 +325,6 @@ static void emit_bpf_tail_call(u8 **pprog)
        *pprog = prog;
 }
 
-
-static void emit_load_skb_data_hlen(u8 **pprog)
-{
-       u8 *prog = *pprog;
-       int cnt = 0;
-
-       /*
-        * r9d = skb->len - skb->data_len (headlen)
-        * r10 = skb->data
-        */
-       /* mov %r9d, off32(%rdi) */
-       EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len));
-
-       /* sub %r9d, off32(%rdi) */
-       EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len));
-
-       /* mov %r10, off32(%rdi) */
-       EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data));
-       *pprog = prog;
-}
-
 static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
                           u32 dst_reg, const u32 imm32)
 {
@@ -462,8 +413,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 
*image,
 {
        struct bpf_insn *insn = bpf_prog->insnsi;
        int insn_cnt = bpf_prog->len;
-       bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0);
-       bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0);
        bool seen_exit = false;
        u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
        int i, cnt = 0;
@@ -473,9 +422,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 
*image,
        emit_prologue(&prog, bpf_prog->aux->stack_depth,
                      bpf_prog_was_classic(bpf_prog));
 
-       if (seen_ld_abs)
-               emit_load_skb_data_hlen(&prog);
-
        for (i = 0; i < insn_cnt; i++, insn++) {
                const s32 imm32 = insn->imm;
                u32 dst_reg = insn->dst_reg;
@@ -483,13 +429,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
                u8 b2 = 0, b3 = 0;
                s64 jmp_offset;
                u8 jmp_cond;
-               bool reload_skb_data;
                int ilen;
                u8 *func;
 
-               if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
-                       ctx->seen_ax_reg = seen_ax_reg = true;
-
                switch (insn->code) {
                        /* ALU */
                case BPF_ALU | BPF_ADD | BPF_X:
@@ -916,36 +858,12 @@ xadd:                     if (is_imm8(insn->off))
                case BPF_JMP | BPF_CALL:
                        func = (u8 *) __bpf_call_base + imm32;
                        jmp_offset = func - (image + addrs[i]);
-                       if (seen_ld_abs) {
-                               reload_skb_data = 
bpf_helper_changes_pkt_data(func);
-                               if (reload_skb_data) {
-                                       EMIT1(0x57); /* push %rdi */
-                                       jmp_offset += 22; /* pop, mov, sub, mov 
*/
-                               } else {
-                                       EMIT2(0x41, 0x52); /* push %r10 */
-                                       EMIT2(0x41, 0x51); /* push %r9 */
-                                       /*
-                                        * We need to adjust jmp offset, since
-                                        * pop %r9, pop %r10 take 4 bytes after 
call insn
-                                        */
-                                       jmp_offset += 4;
-                               }
-                       }
                        if (!imm32 || !is_simm32(jmp_offset)) {
                                pr_err("unsupported BPF func %d addr %p image 
%p\n",
                                       imm32, func, image);
                                return -EINVAL;
                        }
                        EMIT1_off32(0xE8, jmp_offset);
-                       if (seen_ld_abs) {
-                               if (reload_skb_data) {
-                                       EMIT1(0x5F); /* pop %rdi */
-                                       emit_load_skb_data_hlen(&prog);
-                               } else {
-                                       EMIT2(0x41, 0x59); /* pop %r9 */
-                                       EMIT2(0x41, 0x5A); /* pop %r10 */
-                               }
-                       }
                        break;
 
                case BPF_JMP | BPF_TAIL_CALL:
@@ -1080,60 +998,6 @@ xadd:                     if (is_imm8(insn->off))
                        }
                        break;
 
-               case BPF_LD | BPF_IND | BPF_W:
-                       func = sk_load_word;
-                       goto common_load;
-               case BPF_LD | BPF_ABS | BPF_W:
-                       func = CHOOSE_LOAD_FUNC(imm32, sk_load_word);
-common_load:
-                       ctx->seen_ld_abs = seen_ld_abs = true;
-                       jmp_offset = func - (image + addrs[i]);
-                       if (!func || !is_simm32(jmp_offset)) {
-                               pr_err("unsupported BPF func %d addr %p image 
%p\n",
-                                      imm32, func, image);
-                               return -EINVAL;
-                       }
-                       if (BPF_MODE(insn->code) == BPF_ABS) {
-                               /* mov %esi, imm32 */
-                               EMIT1_off32(0xBE, imm32);
-                       } else {
-                               /* mov %rsi, src_reg */
-                               EMIT_mov(BPF_REG_2, src_reg);
-                               if (imm32) {
-                                       if (is_imm8(imm32))
-                                               /* add %esi, imm8 */
-                                               EMIT3(0x83, 0xC6, imm32);
-                                       else
-                                               /* add %esi, imm32 */
-                                               EMIT2_off32(0x81, 0xC6, imm32);
-                               }
-                       }
-                       /*
-                        * skb pointer is in R6 (%rbx), it will be copied into
-                        * %rdi if skb_copy_bits() call is necessary.
-                        * sk_load_* helpers also use %r10 and %r9d.
-                        * See bpf_jit.S
-                        */
-                       if (seen_ax_reg)
-                               /* r10 = skb->data, mov %r10, off32(%rbx) */
-                               EMIT3_off32(0x4c, 0x8b, 0x93,
-                                           offsetof(struct sk_buff, data));
-                       EMIT1_off32(0xE8, jmp_offset); /* call */
-                       break;
-
-               case BPF_LD | BPF_IND | BPF_H:
-                       func = sk_load_half;
-                       goto common_load;
-               case BPF_LD | BPF_ABS | BPF_H:
-                       func = CHOOSE_LOAD_FUNC(imm32, sk_load_half);
-                       goto common_load;
-               case BPF_LD | BPF_IND | BPF_B:
-                       func = sk_load_byte;
-                       goto common_load;
-               case BPF_LD | BPF_ABS | BPF_B:
-                       func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte);
-                       goto common_load;
-
                case BPF_JMP | BPF_EXIT:
                        if (seen_exit) {
                                jmp_offset = ctx->cleanup_addr - addrs[i];
-- 
2.9.5

Reply via email to