Since LD_ABS/LD_IND instructions are now removed from the core and
reimplemented through a combination of inlined BPF instructions and
a slow-path helper, we can get rid of the complexity from ppc64 JIT.

Signed-off-by: Daniel Borkmann <dan...@iogearbox.net>
Acked-by: Naveen N. Rao <naveen.n....@linux.vnet.ibm.com>
Acked-by: Alexei Starovoitov <a...@kernel.org>
Tested-by: Sandipan Das <sandi...@linux.vnet.ibm.com>
---
 arch/powerpc/net/Makefile         |   2 +-
 arch/powerpc/net/bpf_jit64.h      |  37 ++------
 arch/powerpc/net/bpf_jit_asm64.S  | 180 --------------------------------------
 arch/powerpc/net/bpf_jit_comp64.c | 109 +----------------------
 4 files changed, 11 insertions(+), 317 deletions(-)
 delete mode 100644 arch/powerpc/net/bpf_jit_asm64.S

diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile
index 02d369c..809f019 100644
--- a/arch/powerpc/net/Makefile
+++ b/arch/powerpc/net/Makefile
@@ -3,7 +3,7 @@
 # Arch-specific network modules
 #
 ifeq ($(CONFIG_PPC64),y)
-obj-$(CONFIG_BPF_JIT) += bpf_jit_asm64.o bpf_jit_comp64.o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
 else
 obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o
 endif
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 8bdef7e..3609be4 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -20,7 +20,7 @@
  * with our redzone usage.
  *
  *             [       prev sp         ] <-------------
- *             [   nv gpr save area    ] 8*8           |
+ *             [   nv gpr save area    ] 6*8           |
  *             [    tail_call_cnt      ] 8             |
  *             [    local_tmp_var      ] 8             |
  * fp (r31) -->        [   ebpf stack space    ] upto 512      |
@@ -28,8 +28,8 @@
  * sp (r1) --->        [    stack pointer      ] --------------
  */
 
-/* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */
-#define BPF_PPC_STACK_SAVE     (8*8)
+/* for gpr non volatile registers BPG_REG_6 to 10 */
+#define BPF_PPC_STACK_SAVE     (6*8)
 /* for bpf JIT code internal usage */
 #define BPF_PPC_STACK_LOCALS   16
 /* stack frame excluding BPF stack, ensure this is quadword aligned */
@@ -39,10 +39,8 @@
 #ifndef __ASSEMBLY__
 
 /* BPF register usage */
-#define SKB_HLEN_REG   (MAX_BPF_JIT_REG + 0)
-#define SKB_DATA_REG   (MAX_BPF_JIT_REG + 1)
-#define TMP_REG_1      (MAX_BPF_JIT_REG + 2)
-#define TMP_REG_2      (MAX_BPF_JIT_REG + 3)
+#define TMP_REG_1      (MAX_BPF_JIT_REG + 0)
+#define TMP_REG_2      (MAX_BPF_JIT_REG + 1)
 
 /* BPF to ppc register mappings */
 static const int b2p[] = {
@@ -63,40 +61,23 @@ static const int b2p[] = {
        [BPF_REG_FP] = 31,
        /* eBPF jit internal registers */
        [BPF_REG_AX] = 2,
-       [SKB_HLEN_REG] = 25,
-       [SKB_DATA_REG] = 26,
        [TMP_REG_1] = 9,
        [TMP_REG_2] = 10
 };
 
-/* PPC NVR range -- update this if we ever use NVRs below r24 */
-#define BPF_PPC_NVR_MIN                24
-
-/* Assembly helpers */
-#define DECLARE_LOAD_FUNC(func)        u64 func(u64 r3, u64 r4);               
        \
-                               u64 func##_negative_offset(u64 r3, u64 r4);     
\
-                               u64 func##_positive_offset(u64 r3, u64 r4);
-
-DECLARE_LOAD_FUNC(sk_load_word);
-DECLARE_LOAD_FUNC(sk_load_half);
-DECLARE_LOAD_FUNC(sk_load_byte);
-
-#define CHOOSE_LOAD_FUNC(imm, func)                                            
\
-                       (imm < 0 ?                                              
\
-                       (imm >= SKF_LL_OFF ? func##_negative_offset : func) :   
\
-                       func##_positive_offset)
+/* PPC NVR range -- update this if we ever use NVRs below r27 */
+#define BPF_PPC_NVR_MIN                27
 
 #define SEEN_FUNC      0x1000 /* might call external helpers */
 #define SEEN_STACK     0x2000 /* uses BPF stack */
-#define SEEN_SKB       0x4000 /* uses sk_buff */
-#define SEEN_TAILCALL  0x8000 /* uses tail calls */
+#define SEEN_TAILCALL  0x4000 /* uses tail calls */
 
 struct codegen_context {
        /*
         * This is used to track register usage as well
         * as calls to external helpers.
         * - register usage is tracked with corresponding
-        *   bits (r3-r10 and r25-r31)
+        *   bits (r3-r10 and r27-r31)
         * - rest of the bits can be used to track other
         *   things -- for now, we use bits 16 to 23
         *   encoded in SEEN_* macros above
diff --git a/arch/powerpc/net/bpf_jit_asm64.S b/arch/powerpc/net/bpf_jit_asm64.S
deleted file mode 100644
index 7e4c514..0000000
--- a/arch/powerpc/net/bpf_jit_asm64.S
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * bpf_jit_asm64.S: Packet/header access helper functions
- * for PPC64 BPF compiler.
- *
- * Copyright 2016, Naveen N. Rao <naveen.n....@linux.vnet.ibm.com>
- *                IBM Corporation
- *
- * Based on bpf_jit_asm.S by Matt Evans
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/ptrace.h>
-#include "bpf_jit64.h"
-
-/*
- * All of these routines are called directly from generated code,
- * with the below register usage:
- * r27         skb pointer (ctx)
- * r25         skb header length
- * r26         skb->data pointer
- * r4          offset
- *
- * Result is passed back in:
- * r8          data read in host endian format (accumulator)
- *
- * r9 is used as a temporary register
- */
-
-#define r_skb  r27
-#define r_hlen r25
-#define r_data r26
-#define r_off  r4
-#define r_val  r8
-#define r_tmp  r9
-
-_GLOBAL_TOC(sk_load_word)
-       cmpdi   r_off, 0
-       blt     bpf_slow_path_word_neg
-       b       sk_load_word_positive_offset
-
-_GLOBAL_TOC(sk_load_word_positive_offset)
-       /* Are we accessing past headlen? */
-       subi    r_tmp, r_hlen, 4
-       cmpd    r_tmp, r_off
-       blt     bpf_slow_path_word
-       /* Nope, just hitting the header.  cr0 here is eq or gt! */
-       LWZX_BE r_val, r_data, r_off
-       blr     /* Return success, cr0 != LT */
-
-_GLOBAL_TOC(sk_load_half)
-       cmpdi   r_off, 0
-       blt     bpf_slow_path_half_neg
-       b       sk_load_half_positive_offset
-
-_GLOBAL_TOC(sk_load_half_positive_offset)
-       subi    r_tmp, r_hlen, 2
-       cmpd    r_tmp, r_off
-       blt     bpf_slow_path_half
-       LHZX_BE r_val, r_data, r_off
-       blr
-
-_GLOBAL_TOC(sk_load_byte)
-       cmpdi   r_off, 0
-       blt     bpf_slow_path_byte_neg
-       b       sk_load_byte_positive_offset
-
-_GLOBAL_TOC(sk_load_byte_positive_offset)
-       cmpd    r_hlen, r_off
-       ble     bpf_slow_path_byte
-       lbzx    r_val, r_data, r_off
-       blr
-
-/*
- * Call out to skb_copy_bits:
- * Allocate a new stack frame here to remain ABI-compliant in
- * stashing LR.
- */
-#define bpf_slow_path_common(SIZE)                                     \
-       mflr    r0;                                                     \
-       std     r0, PPC_LR_STKOFF(r1);                                  \
-       stdu    r1, -(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS)(r1); \
-       mr      r3, r_skb;                                              \
-       /* r4 = r_off as passed */                                      \
-       addi    r5, r1, STACK_FRAME_MIN_SIZE;                           \
-       li      r6, SIZE;                                               \
-       bl      skb_copy_bits;                                          \
-       nop;                                                            \
-       /* save r5 */                                                   \
-       addi    r5, r1, STACK_FRAME_MIN_SIZE;                           \
-       /* r3 = 0 on success */                                         \
-       addi    r1, r1, STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS;    \
-       ld      r0, PPC_LR_STKOFF(r1);                                  \
-       mtlr    r0;                                                     \
-       cmpdi   r3, 0;                                                  \
-       blt     bpf_error;      /* cr0 = LT */
-
-bpf_slow_path_word:
-       bpf_slow_path_common(4)
-       /* Data value is on stack, and cr0 != LT */
-       LWZX_BE r_val, 0, r5
-       blr
-
-bpf_slow_path_half:
-       bpf_slow_path_common(2)
-       LHZX_BE r_val, 0, r5
-       blr
-
-bpf_slow_path_byte:
-       bpf_slow_path_common(1)
-       lbzx    r_val, 0, r5
-       blr
-
-/*
- * Call out to bpf_internal_load_pointer_neg_helper
- */
-#define sk_negative_common(SIZE)                               \
-       mflr    r0;                                             \
-       std     r0, PPC_LR_STKOFF(r1);                          \
-       stdu    r1, -STACK_FRAME_MIN_SIZE(r1);                  \
-       mr      r3, r_skb;                                      \
-       /* r4 = r_off, as passed */                             \
-       li      r5, SIZE;                                       \
-       bl      bpf_internal_load_pointer_neg_helper;           \
-       nop;                                                    \
-       addi    r1, r1, STACK_FRAME_MIN_SIZE;                   \
-       ld      r0, PPC_LR_STKOFF(r1);                          \
-       mtlr    r0;                                             \
-       /* R3 != 0 on success */                                \
-       cmpldi  r3, 0;                                          \
-       beq     bpf_error_slow; /* cr0 = EQ */
-
-bpf_slow_path_word_neg:
-       lis     r_tmp, -32      /* SKF_LL_OFF */
-       cmpd    r_off, r_tmp    /* addr < SKF_* */
-       blt     bpf_error       /* cr0 = LT */
-       b       sk_load_word_negative_offset
-
-_GLOBAL_TOC(sk_load_word_negative_offset)
-       sk_negative_common(4)
-       LWZX_BE r_val, 0, r3
-       blr
-
-bpf_slow_path_half_neg:
-       lis     r_tmp, -32      /* SKF_LL_OFF */
-       cmpd    r_off, r_tmp    /* addr < SKF_* */
-       blt     bpf_error       /* cr0 = LT */
-       b       sk_load_half_negative_offset
-
-_GLOBAL_TOC(sk_load_half_negative_offset)
-       sk_negative_common(2)
-       LHZX_BE r_val, 0, r3
-       blr
-
-bpf_slow_path_byte_neg:
-       lis     r_tmp, -32      /* SKF_LL_OFF */
-       cmpd    r_off, r_tmp    /* addr < SKF_* */
-       blt     bpf_error       /* cr0 = LT */
-       b       sk_load_byte_negative_offset
-
-_GLOBAL_TOC(sk_load_byte_negative_offset)
-       sk_negative_common(1)
-       lbzx    r_val, 0, r3
-       blr
-
-bpf_error_slow:
-       /* fabricate a cr0 = lt */
-       li      r_tmp, -1
-       cmpdi   r_tmp, 0
-bpf_error:
-       /*
-        * Entered with cr0 = lt
-        * Generated code will 'blt epilogue', returning 0.
-        */
-       li      r_val, 0
-       blr
diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index 0ef3d95..1bdb1af 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -59,7 +59,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context 
*ctx)
  *             [       prev sp         ] <-------------
  *             [         ...           ]               |
  * sp (r1) --->        [    stack pointer      ] --------------
- *             [   nv gpr save area    ] 8*8
+ *             [   nv gpr save area    ] 6*8
  *             [    tail_call_cnt      ] 8
  *             [    local_tmp_var      ] 8
  *             [   unused red zone     ] 208 bytes protected
@@ -88,21 +88,6 @@ static int bpf_jit_stack_offsetof(struct codegen_context 
*ctx, int reg)
        BUG();
 }
 
-static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx)
-{
-       /*
-        * Load skb->len and skb->data_len
-        * r3 points to skb
-        */
-       PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len));
-       PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len));
-       /* header_len = len - data_len */
-       PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]);
-
-       /* skb->data pointer */
-       PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data));
-}
-
 static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 {
        int i;
@@ -145,18 +130,6 @@ static void bpf_jit_build_prologue(u32 *image, struct 
codegen_context *ctx)
                if (bpf_is_seen_register(ctx, i))
                        PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, 
b2p[i]));
 
-       /*
-        * Save additional non-volatile regs if we cache skb
-        * Also, setup skb data
-        */
-       if (ctx->seen & SEEN_SKB) {
-               PPC_BPF_STL(b2p[SKB_HLEN_REG], 1,
-                               bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
-               PPC_BPF_STL(b2p[SKB_DATA_REG], 1,
-                               bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
-               bpf_jit_emit_skb_loads(image, ctx);
-       }
-
        /* Setup frame pointer to point to the bpf stack area */
        if (bpf_is_seen_register(ctx, BPF_REG_FP))
                PPC_ADDI(b2p[BPF_REG_FP], 1,
@@ -172,14 +145,6 @@ static void bpf_jit_emit_common_epilogue(u32 *image, 
struct codegen_context *ctx
                if (bpf_is_seen_register(ctx, i))
                        PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, 
b2p[i]));
 
-       /* Restore non-volatile registers used for skb cache */
-       if (ctx->seen & SEEN_SKB) {
-               PPC_BPF_LL(b2p[SKB_HLEN_REG], 1,
-                               bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
-               PPC_BPF_LL(b2p[SKB_DATA_REG], 1,
-                               bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
-       }
-
        /* Tear down our stack frame */
        if (bpf_has_stack_frame(ctx)) {
                PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size);
@@ -753,23 +718,10 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 
*image,
                        ctx->seen |= SEEN_FUNC;
                        func = (u8 *) __bpf_call_base + imm;
 
-                       /* Save skb pointer if we need to re-cache skb data */
-                       if ((ctx->seen & SEEN_SKB) &&
-                           bpf_helper_changes_pkt_data(func))
-                               PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
-
                        bpf_jit_emit_func_call(image, ctx, (u64)func);
 
                        /* move return value from r3 to BPF_REG_0 */
                        PPC_MR(b2p[BPF_REG_0], 3);
-
-                       /* refresh skb cache */
-                       if ((ctx->seen & SEEN_SKB) &&
-                           bpf_helper_changes_pkt_data(func)) {
-                               /* reload skb pointer to r3 */
-                               PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
-                               bpf_jit_emit_skb_loads(image, ctx);
-                       }
                        break;
 
                /*
@@ -887,65 +839,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 
*image,
                        break;
 
                /*
-                * Loads from packet header/data
-                * Assume 32-bit input value in imm and X (src_reg)
-                */
-
-               /* Absolute loads */
-               case BPF_LD | BPF_W | BPF_ABS:
-                       func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word);
-                       goto common_load_abs;
-               case BPF_LD | BPF_H | BPF_ABS:
-                       func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half);
-                       goto common_load_abs;
-               case BPF_LD | BPF_B | BPF_ABS:
-                       func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte);
-common_load_abs:
-                       /*
-                        * Load from [imm]
-                        * Load into r4, which can just be passed onto
-                        *  skb load helpers as the second parameter
-                        */
-                       PPC_LI32(4, imm);
-                       goto common_load;
-
-               /* Indirect loads */
-               case BPF_LD | BPF_W | BPF_IND:
-                       func = (u8 *)sk_load_word;
-                       goto common_load_ind;
-               case BPF_LD | BPF_H | BPF_IND:
-                       func = (u8 *)sk_load_half;
-                       goto common_load_ind;
-               case BPF_LD | BPF_B | BPF_IND:
-                       func = (u8 *)sk_load_byte;
-common_load_ind:
-                       /*
-                        * Load from [src_reg + imm]
-                        * Treat src_reg as a 32-bit value
-                        */
-                       PPC_EXTSW(4, src_reg);
-                       if (imm) {
-                               if (imm >= -32768 && imm < 32768)
-                                       PPC_ADDI(4, 4, IMM_L(imm));
-                               else {
-                                       PPC_LI32(b2p[TMP_REG_1], imm);
-                                       PPC_ADD(4, 4, b2p[TMP_REG_1]);
-                               }
-                       }
-
-common_load:
-                       ctx->seen |= SEEN_SKB;
-                       ctx->seen |= SEEN_FUNC;
-                       bpf_jit_emit_func_call(image, ctx, (u64)func);
-
-                       /*
-                        * Helper returns 'lt' condition on error, and an
-                        * appropriate return value in BPF_REG_0
-                        */
-                       PPC_BCC(COND_LT, exit_addr);
-                       break;
-
-               /*
                 * Tail call
                 */
                case BPF_JMP | BPF_TAIL_CALL:
-- 
2.9.5

Reply via email to