Since LD_ABS/LD_IND instructions are now removed from the core and reimplemented through a combination of inlined BPF instructions and a slow-path helper, we can get rid of the complexity from ppc64 JIT.
Signed-off-by: Daniel Borkmann <dan...@iogearbox.net> Acked-by: Naveen N. Rao <naveen.n....@linux.vnet.ibm.com> Acked-by: Alexei Starovoitov <a...@kernel.org> Tested-by: Sandipan Das <sandi...@linux.vnet.ibm.com> --- arch/powerpc/net/Makefile | 2 +- arch/powerpc/net/bpf_jit64.h | 37 ++------ arch/powerpc/net/bpf_jit_asm64.S | 180 -------------------------------------- arch/powerpc/net/bpf_jit_comp64.c | 109 +---------------------- 4 files changed, 11 insertions(+), 317 deletions(-) delete mode 100644 arch/powerpc/net/bpf_jit_asm64.S diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index 02d369c..809f019 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile @@ -3,7 +3,7 @@ # Arch-specific network modules # ifeq ($(CONFIG_PPC64),y) -obj-$(CONFIG_BPF_JIT) += bpf_jit_asm64.o bpf_jit_comp64.o +obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o else obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o endif diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index 8bdef7e..3609be4 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -20,7 +20,7 @@ * with our redzone usage. * * [ prev sp ] <------------- - * [ nv gpr save area ] 8*8 | + * [ nv gpr save area ] 6*8 | * [ tail_call_cnt ] 8 | * [ local_tmp_var ] 8 | * fp (r31) --> [ ebpf stack space ] upto 512 | @@ -28,8 +28,8 @@ * sp (r1) ---> [ stack pointer ] -------------- */ -/* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */ -#define BPF_PPC_STACK_SAVE (8*8) +/* for gpr non volatile registers BPG_REG_6 to 10 */ +#define BPF_PPC_STACK_SAVE (6*8) /* for bpf JIT code internal usage */ #define BPF_PPC_STACK_LOCALS 16 /* stack frame excluding BPF stack, ensure this is quadword aligned */ @@ -39,10 +39,8 @@ #ifndef __ASSEMBLY__ /* BPF register usage */ -#define SKB_HLEN_REG (MAX_BPF_JIT_REG + 0) -#define SKB_DATA_REG (MAX_BPF_JIT_REG + 1) -#define TMP_REG_1 (MAX_BPF_JIT_REG + 2) -#define TMP_REG_2 (MAX_BPF_JIT_REG + 3) +#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* BPF to ppc register mappings */ static const int b2p[] = { @@ -63,40 +61,23 @@ static const int b2p[] = { [BPF_REG_FP] = 31, /* eBPF jit internal registers */ [BPF_REG_AX] = 2, - [SKB_HLEN_REG] = 25, - [SKB_DATA_REG] = 26, [TMP_REG_1] = 9, [TMP_REG_2] = 10 }; -/* PPC NVR range -- update this if we ever use NVRs below r24 */ -#define BPF_PPC_NVR_MIN 24 - -/* Assembly helpers */ -#define DECLARE_LOAD_FUNC(func) u64 func(u64 r3, u64 r4); \ - u64 func##_negative_offset(u64 r3, u64 r4); \ - u64 func##_positive_offset(u64 r3, u64 r4); - -DECLARE_LOAD_FUNC(sk_load_word); -DECLARE_LOAD_FUNC(sk_load_half); -DECLARE_LOAD_FUNC(sk_load_byte); - -#define CHOOSE_LOAD_FUNC(imm, func) \ - (imm < 0 ? \ - (imm >= SKF_LL_OFF ? func##_negative_offset : func) : \ - func##_positive_offset) +/* PPC NVR range -- update this if we ever use NVRs below r27 */ +#define BPF_PPC_NVR_MIN 27 #define SEEN_FUNC 0x1000 /* might call external helpers */ #define SEEN_STACK 0x2000 /* uses BPF stack */ -#define SEEN_SKB 0x4000 /* uses sk_buff */ -#define SEEN_TAILCALL 0x8000 /* uses tail calls */ +#define SEEN_TAILCALL 0x4000 /* uses tail calls */ struct codegen_context { /* * This is used to track register usage as well * as calls to external helpers. * - register usage is tracked with corresponding - * bits (r3-r10 and r25-r31) + * bits (r3-r10 and r27-r31) * - rest of the bits can be used to track other * things -- for now, we use bits 16 to 23 * encoded in SEEN_* macros above diff --git a/arch/powerpc/net/bpf_jit_asm64.S b/arch/powerpc/net/bpf_jit_asm64.S deleted file mode 100644 index 7e4c514..0000000 --- a/arch/powerpc/net/bpf_jit_asm64.S +++ /dev/null @@ -1,180 +0,0 @@ -/* - * bpf_jit_asm64.S: Packet/header access helper functions - * for PPC64 BPF compiler. - * - * Copyright 2016, Naveen N. Rao <naveen.n....@linux.vnet.ibm.com> - * IBM Corporation - * - * Based on bpf_jit_asm.S by Matt Evans - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -#include <asm/ppc_asm.h> -#include <asm/ptrace.h> -#include "bpf_jit64.h" - -/* - * All of these routines are called directly from generated code, - * with the below register usage: - * r27 skb pointer (ctx) - * r25 skb header length - * r26 skb->data pointer - * r4 offset - * - * Result is passed back in: - * r8 data read in host endian format (accumulator) - * - * r9 is used as a temporary register - */ - -#define r_skb r27 -#define r_hlen r25 -#define r_data r26 -#define r_off r4 -#define r_val r8 -#define r_tmp r9 - -_GLOBAL_TOC(sk_load_word) - cmpdi r_off, 0 - blt bpf_slow_path_word_neg - b sk_load_word_positive_offset - -_GLOBAL_TOC(sk_load_word_positive_offset) - /* Are we accessing past headlen? */ - subi r_tmp, r_hlen, 4 - cmpd r_tmp, r_off - blt bpf_slow_path_word - /* Nope, just hitting the header. cr0 here is eq or gt! */ - LWZX_BE r_val, r_data, r_off - blr /* Return success, cr0 != LT */ - -_GLOBAL_TOC(sk_load_half) - cmpdi r_off, 0 - blt bpf_slow_path_half_neg - b sk_load_half_positive_offset - -_GLOBAL_TOC(sk_load_half_positive_offset) - subi r_tmp, r_hlen, 2 - cmpd r_tmp, r_off - blt bpf_slow_path_half - LHZX_BE r_val, r_data, r_off - blr - -_GLOBAL_TOC(sk_load_byte) - cmpdi r_off, 0 - blt bpf_slow_path_byte_neg - b sk_load_byte_positive_offset - -_GLOBAL_TOC(sk_load_byte_positive_offset) - cmpd r_hlen, r_off - ble bpf_slow_path_byte - lbzx r_val, r_data, r_off - blr - -/* - * Call out to skb_copy_bits: - * Allocate a new stack frame here to remain ABI-compliant in - * stashing LR. - */ -#define bpf_slow_path_common(SIZE) \ - mflr r0; \ - std r0, PPC_LR_STKOFF(r1); \ - stdu r1, -(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS)(r1); \ - mr r3, r_skb; \ - /* r4 = r_off as passed */ \ - addi r5, r1, STACK_FRAME_MIN_SIZE; \ - li r6, SIZE; \ - bl skb_copy_bits; \ - nop; \ - /* save r5 */ \ - addi r5, r1, STACK_FRAME_MIN_SIZE; \ - /* r3 = 0 on success */ \ - addi r1, r1, STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS; \ - ld r0, PPC_LR_STKOFF(r1); \ - mtlr r0; \ - cmpdi r3, 0; \ - blt bpf_error; /* cr0 = LT */ - -bpf_slow_path_word: - bpf_slow_path_common(4) - /* Data value is on stack, and cr0 != LT */ - LWZX_BE r_val, 0, r5 - blr - -bpf_slow_path_half: - bpf_slow_path_common(2) - LHZX_BE r_val, 0, r5 - blr - -bpf_slow_path_byte: - bpf_slow_path_common(1) - lbzx r_val, 0, r5 - blr - -/* - * Call out to bpf_internal_load_pointer_neg_helper - */ -#define sk_negative_common(SIZE) \ - mflr r0; \ - std r0, PPC_LR_STKOFF(r1); \ - stdu r1, -STACK_FRAME_MIN_SIZE(r1); \ - mr r3, r_skb; \ - /* r4 = r_off, as passed */ \ - li r5, SIZE; \ - bl bpf_internal_load_pointer_neg_helper; \ - nop; \ - addi r1, r1, STACK_FRAME_MIN_SIZE; \ - ld r0, PPC_LR_STKOFF(r1); \ - mtlr r0; \ - /* R3 != 0 on success */ \ - cmpldi r3, 0; \ - beq bpf_error_slow; /* cr0 = EQ */ - -bpf_slow_path_word_neg: - lis r_tmp, -32 /* SKF_LL_OFF */ - cmpd r_off, r_tmp /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - b sk_load_word_negative_offset - -_GLOBAL_TOC(sk_load_word_negative_offset) - sk_negative_common(4) - LWZX_BE r_val, 0, r3 - blr - -bpf_slow_path_half_neg: - lis r_tmp, -32 /* SKF_LL_OFF */ - cmpd r_off, r_tmp /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - b sk_load_half_negative_offset - -_GLOBAL_TOC(sk_load_half_negative_offset) - sk_negative_common(2) - LHZX_BE r_val, 0, r3 - blr - -bpf_slow_path_byte_neg: - lis r_tmp, -32 /* SKF_LL_OFF */ - cmpd r_off, r_tmp /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - b sk_load_byte_negative_offset - -_GLOBAL_TOC(sk_load_byte_negative_offset) - sk_negative_common(1) - lbzx r_val, 0, r3 - blr - -bpf_error_slow: - /* fabricate a cr0 = lt */ - li r_tmp, -1 - cmpdi r_tmp, 0 -bpf_error: - /* - * Entered with cr0 = lt - * Generated code will 'blt epilogue', returning 0. - */ - li r_val, 0 - blr diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 0ef3d95..1bdb1af 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -59,7 +59,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * [ prev sp ] <------------- * [ ... ] | * sp (r1) ---> [ stack pointer ] -------------- - * [ nv gpr save area ] 8*8 + * [ nv gpr save area ] 6*8 * [ tail_call_cnt ] 8 * [ local_tmp_var ] 8 * [ unused red zone ] 208 bytes protected @@ -88,21 +88,6 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) BUG(); } -static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx) -{ - /* - * Load skb->len and skb->data_len - * r3 points to skb - */ - PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len)); - PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len)); - /* header_len = len - data_len */ - PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]); - - /* skb->data pointer */ - PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data)); -} - static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; @@ -145,18 +130,6 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) if (bpf_is_seen_register(ctx, i)) PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); - /* - * Save additional non-volatile regs if we cache skb - * Also, setup skb data - */ - if (ctx->seen & SEEN_SKB) { - PPC_BPF_STL(b2p[SKB_HLEN_REG], 1, - bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG])); - PPC_BPF_STL(b2p[SKB_DATA_REG], 1, - bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG])); - bpf_jit_emit_skb_loads(image, ctx); - } - /* Setup frame pointer to point to the bpf stack area */ if (bpf_is_seen_register(ctx, BPF_REG_FP)) PPC_ADDI(b2p[BPF_REG_FP], 1, @@ -172,14 +145,6 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx if (bpf_is_seen_register(ctx, i)) PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); - /* Restore non-volatile registers used for skb cache */ - if (ctx->seen & SEEN_SKB) { - PPC_BPF_LL(b2p[SKB_HLEN_REG], 1, - bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG])); - PPC_BPF_LL(b2p[SKB_DATA_REG], 1, - bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG])); - } - /* Tear down our stack frame */ if (bpf_has_stack_frame(ctx)) { PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); @@ -753,23 +718,10 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, ctx->seen |= SEEN_FUNC; func = (u8 *) __bpf_call_base + imm; - /* Save skb pointer if we need to re-cache skb data */ - if ((ctx->seen & SEEN_SKB) && - bpf_helper_changes_pkt_data(func)) - PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); - bpf_jit_emit_func_call(image, ctx, (u64)func); /* move return value from r3 to BPF_REG_0 */ PPC_MR(b2p[BPF_REG_0], 3); - - /* refresh skb cache */ - if ((ctx->seen & SEEN_SKB) && - bpf_helper_changes_pkt_data(func)) { - /* reload skb pointer to r3 */ - PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); - bpf_jit_emit_skb_loads(image, ctx); - } break; /* @@ -887,65 +839,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, break; /* - * Loads from packet header/data - * Assume 32-bit input value in imm and X (src_reg) - */ - - /* Absolute loads */ - case BPF_LD | BPF_W | BPF_ABS: - func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word); - goto common_load_abs; - case BPF_LD | BPF_H | BPF_ABS: - func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half); - goto common_load_abs; - case BPF_LD | BPF_B | BPF_ABS: - func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte); -common_load_abs: - /* - * Load from [imm] - * Load into r4, which can just be passed onto - * skb load helpers as the second parameter - */ - PPC_LI32(4, imm); - goto common_load; - - /* Indirect loads */ - case BPF_LD | BPF_W | BPF_IND: - func = (u8 *)sk_load_word; - goto common_load_ind; - case BPF_LD | BPF_H | BPF_IND: - func = (u8 *)sk_load_half; - goto common_load_ind; - case BPF_LD | BPF_B | BPF_IND: - func = (u8 *)sk_load_byte; -common_load_ind: - /* - * Load from [src_reg + imm] - * Treat src_reg as a 32-bit value - */ - PPC_EXTSW(4, src_reg); - if (imm) { - if (imm >= -32768 && imm < 32768) - PPC_ADDI(4, 4, IMM_L(imm)); - else { - PPC_LI32(b2p[TMP_REG_1], imm); - PPC_ADD(4, 4, b2p[TMP_REG_1]); - } - } - -common_load: - ctx->seen |= SEEN_SKB; - ctx->seen |= SEEN_FUNC; - bpf_jit_emit_func_call(image, ctx, (u64)func); - - /* - * Helper returns 'lt' condition on error, and an - * appropriate return value in BPF_REG_0 - */ - PPC_BCC(COND_LT, exit_addr); - break; - - /* * Tail call */ case BPF_JMP | BPF_TAIL_CALL: -- 2.9.5