From: Jiong Wang <jiong.w...@netronome.com>

As we are doing JIT, we would want to use the advanced version of the
reciprocal divide (reciprocal_value_adv) to trade performance with host.

We could reduce the required ALU instructions from 4 to 2 or 1.

Signed-off-by: Jiong Wang <jiong.w...@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicin...@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 38 ++++++++++++++-----
 .../net/ethernet/netronome/nfp/bpf/verifier.c | 16 ++++++--
 2 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c 
b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index d732b6cfc356..f99ac00bd649 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1498,8 +1498,9 @@ static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, 
u64 imm)
 {
        swreg tmp_both = imm_both(nfp_prog), dst_both = reg_both(dst);
        swreg dst_a = reg_a(dst), dst_b = reg_a(dst);
-       struct reciprocal_value rvalue;
+       struct reciprocal_value_adv rvalue;
        swreg tmp_b = imm_b(nfp_prog);
+       u8 pre_shift, exp;
        swreg magic;
 
        if (imm > U32_MAX) {
@@ -1507,15 +1508,34 @@ static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 
dst, u64 imm)
                return 0;
        }
 
-       rvalue = reciprocal_value(imm);
+       rvalue = reciprocal_value_adv(imm, 32);
+       exp = rvalue.exp;
+       if (rvalue.is_wide_m && !(imm & 1)) {
+               pre_shift = fls(imm & -imm) - 1;
+               rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift);
+       } else {
+               pre_shift = 0;
+       }
        magic = re_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog));
-       wrp_mul_u32(nfp_prog, tmp_both, tmp_both, dst_a, magic, true);
-       emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, tmp_b);
-       emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
-                SHF_SC_R_SHF, rvalue.sh1);
-       emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, tmp_b);
-       emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
-                SHF_SC_R_SHF, rvalue.sh2);
+       if (imm == 1 << exp) {
+               emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+                        SHF_SC_R_SHF, exp);
+       } else if (rvalue.is_wide_m) {
+               wrp_mul_u32(nfp_prog, tmp_both, tmp_both, dst_a, magic, true);
+               emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, tmp_b);
+               emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+                        SHF_SC_R_SHF, 1);
+               emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, tmp_b);
+               emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+                        SHF_SC_R_SHF, rvalue.sh - 1);
+       } else {
+               if (pre_shift)
+                       emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
+                                dst_b, SHF_SC_R_SHF, pre_shift);
+               wrp_mul_u32(nfp_prog, dst_both, dst_both, dst_a, magic, true);
+               emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
+                        dst_b, SHF_SC_R_SHF, rvalue.sh);
+       }
 
        return 0;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c 
b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index f0f07e988c46..39c2c24fea11 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -561,12 +561,22 @@ nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct 
nfp_insn_meta *meta,
        /* NFP doesn't have divide instructions, we support divide by constant
         * through reciprocal multiplication. Given NFP support multiplication
         * no bigger than u32, we'd require divisor and dividend no bigger than
-        * that as well.
+        * that as well. There is a further range requirement on dividend,
+        * please see the NOTE below.
         *
         * Also eBPF doesn't support signed divide and has enforced this on C
         * language level by failing compilation. However LLVM assembler hasn't
         * enforced this, so it is possible for negative constant to leak in as
         * a BPF_K operand through assembly code, we reject such cases as well.
+        *
+        * NOTE: because we are using "reciprocal_value_adv" which doesn't
+        * support dividend with MSB set, so we need to JIT separate NFP
+        * sequence to handle such case. It could be a simple sequence if there
+        * is conditional move, however there isn't for NFP. So, we don't bother
+        * generating compare-if-set-branch sequence by rejecting the program
+        * straight away when the u32 dividend has MSB set. Divide by such a
+        * large constant would be rare in practice. Also, the programmer could
+        * simply rewrite it as "result = divisor >= the_const".
         */
        if (is_mbpf_div(meta)) {
                if (meta->umax_dst > U32_MAX) {
@@ -578,8 +588,8 @@ nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct 
nfp_insn_meta *meta,
                                pr_vlog(env, "dividend is not constant\n");
                                return -EINVAL;
                        }
-                       if (meta->umax_src > U32_MAX) {
-                               pr_vlog(env, "dividend is not within u32 value 
range\n");
+                       if (meta->umax_src > U32_MAX / 2) {
+                               pr_vlog(env, "dividend is bigger than 
U32_MAX/2\n");
                                return -EINVAL;
                        }
                }
-- 
2.17.1

Reply via email to