Simple classic BPF interpreter based off of libpcap.

This is a copy of the BPF interpreter from libpcap which is
modified to handle mbuf meta data. The existing pcap_offline_filter
does not expose a way to match VLAN tags. Copying the BPF interpreter
also means that rte_pdump still does not have a hard dependency
on libpcap.

The API for pdump is versioned because the filter needs to
know both byte code and length of program to validate it.

This patch does cause a small checkpatch warning because
it keeps the original variable names from the pcap code.

Signed-off-by: Stephen Hemminger <[email protected]>
---
 app/pdump/main.c                       |  16 +-
 app/test/test_pdump.c                  |   4 +-
 lib/librte_pdump/Makefile              |   2 +-
 lib/librte_pdump/pdump_bpf.h           | 168 +++++++++
 lib/librte_pdump/rte_pcap_filter.c     | 462 +++++++++++++++++++++++++
 lib/librte_pdump/rte_pdump.c           | 145 ++++++--
 lib/librte_pdump/rte_pdump.h           |  54 ++-
 lib/librte_pdump/rte_pdump_version.map |   7 +
 8 files changed, 806 insertions(+), 52 deletions(-)
 create mode 100644 lib/librte_pdump/pdump_bpf.h
 create mode 100644 lib/librte_pdump/rte_pcap_filter.c

diff --git a/app/pdump/main.c b/app/pdump/main.c
index c1b901279f4b..c3eb554ef28b 100644
--- a/app/pdump/main.c
+++ b/app/pdump/main.c
@@ -828,20 +828,20 @@ enable_pdump(void)
                                                pt->queue,
                                                RTE_PDUMP_FLAG_RX,
                                                pt->rx_ring,
-                                               pt->mp, NULL);
+                                               pt->mp, NULL, 0);
                                ret1 = rte_pdump_enable_by_deviceid(
                                                pt->device_id,
                                                pt->queue,
                                                RTE_PDUMP_FLAG_TX,
                                                pt->tx_ring,
-                                               pt->mp, NULL);
+                                               pt->mp, NULL, 0);
                        } else if (pt->dump_by_type == PORT_ID) {
                                ret = rte_pdump_enable(pt->port, pt->queue,
                                                RTE_PDUMP_FLAG_RX,
-                                               pt->rx_ring, pt->mp, NULL);
+                                               pt->rx_ring, pt->mp, NULL, 0);
                                ret1 = rte_pdump_enable(pt->port, pt->queue,
                                                RTE_PDUMP_FLAG_TX,
-                                               pt->tx_ring, pt->mp, NULL);
+                                               pt->tx_ring, pt->mp, NULL, 0);
                        }
                } else if (pt->dir == RTE_PDUMP_FLAG_RX) {
                        if (pt->dump_by_type == DEVICE_ID)
@@ -849,22 +849,22 @@ enable_pdump(void)
                                                pt->device_id,
                                                pt->queue,
                                                pt->dir, pt->rx_ring,
-                                               pt->mp, NULL);
+                                               pt->mp, NULL, 0);
                        else if (pt->dump_by_type == PORT_ID)
                                ret = rte_pdump_enable(pt->port, pt->queue,
                                                pt->dir,
-                                               pt->rx_ring, pt->mp, NULL);
+                                               pt->rx_ring, pt->mp, NULL, 0);
                } else if (pt->dir == RTE_PDUMP_FLAG_TX) {
                        if (pt->dump_by_type == DEVICE_ID)
                                ret = rte_pdump_enable_by_deviceid(
                                                pt->device_id,
                                                pt->queue,
                                                pt->dir,
-                                               pt->tx_ring, pt->mp, NULL);
+                                               pt->tx_ring, pt->mp, NULL, 0);
                        else if (pt->dump_by_type == PORT_ID)
                                ret = rte_pdump_enable(pt->port, pt->queue,
                                                pt->dir,
-                                               pt->tx_ring, pt->mp, NULL);
+                                               pt->tx_ring, pt->mp, NULL, 0);
                }
                if (ret < 0 || ret1 < 0) {
                        cleanup_pdump_resources();
diff --git a/app/test/test_pdump.c b/app/test/test_pdump.c
index af206968b38d..f0187a4cd279 100644
--- a/app/test/test_pdump.c
+++ b/app/test/test_pdump.c
@@ -79,7 +79,7 @@ run_pdump_client_tests(void)
 
        for (itr = 0; itr < NUM_ITR; itr++) {
                ret = rte_pdump_enable(portid, QUEUE_ID, flags, ring_client,
-                                      mp, NULL);
+                                      mp, NULL, 0);
                if (ret < 0) {
                        printf("rte_pdump_enable failed\n");
                        return -1;
@@ -94,7 +94,7 @@ run_pdump_client_tests(void)
                printf("pdump_disable success\n");
 
                ret = rte_pdump_enable_by_deviceid(deviceid, QUEUE_ID, flags,
-                                                  ring_client, mp, NULL);
+                                                  ring_client, mp, NULL, 0);
                if (ret < 0) {
                        printf("rte_pdump_enable_by_deviceid failed\n");
                        return -1;
diff --git a/lib/librte_pdump/Makefile b/lib/librte_pdump/Makefile
index 89593689a7d5..4a631c06a0ec 100644
--- a/lib/librte_pdump/Makefile
+++ b/lib/librte_pdump/Makefile
@@ -15,7 +15,7 @@ EXPORT_MAP := rte_pdump_version.map
 LIBABIVER := 3
 
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_PDUMP) := rte_pdump.c
+SRCS-$(CONFIG_RTE_LIBRTE_PDUMP) := rte_pdump.c rte_pcap_filter.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_PDUMP)-include := rte_pdump.h
diff --git a/lib/librte_pdump/pdump_bpf.h b/lib/librte_pdump/pdump_bpf.h
new file mode 100644
index 000000000000..8f6d00f3cee2
--- /dev/null
+++ b/lib/librte_pdump/pdump_bpf.h
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This is code is derived from the libpcap bpf_filter which
+ * in turn is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ */
+
+#ifndef _PDUMP_BPF_H_
+#define _PDUMP_BPF_H__
+
+/*
+ * This is based off of libpcap's cut-down version of bpf.h;
+ * it includes only  the stuff needed for the BPF interpreter.
+ *
+ * Note: this is the original classic BPF generated by libpcap
+ *  not the new eBPF used elsewhere.
+ */
+
+typedef        int bpf_int32;
+typedef        unsigned int bpf_u_int32;
+
+/*
+ * Alignment macros.  BPF_WORDALIGN rounds up to the next
+ * even multiple of BPF_ALIGNMENT.
+ */
+#define BPF_ALIGNMENT sizeof(bpf_int32)
+#define BPF_WORDALIGN(x) (((x)+(BPF_ALIGNMENT-1))&~(BPF_ALIGNMENT-1))
+
+/*
+ * Number of scratch memory words (for BPF_LD|BPF_MEM and BPF_ST).
+ */
+#define BPF_MEMWORDS 16
+
+/*
+ * The instruction encodings.
+ *
+ * Please inform [email protected] if you use any
+ * of the reserved values, so that we can note that they're used
+ * (and perhaps implement it in the reference BPF implementation
+ * and encourage its implementation elsewhere).
+ */
+
+/*
+ * The upper 8 bits of the opcode aren't used. BSD/OS used 0x8000.
+ */
+
+/* instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define                BPF_LD          0x00
+#define                BPF_LDX         0x01
+#define                BPF_ST          0x02
+#define                BPF_STX         0x03
+#define                BPF_ALU         0x04
+#define                BPF_JMP         0x05
+#define                BPF_RET         0x06
+#define                BPF_MISC        0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code) ((code) & 0x18)
+#define                BPF_W           0x00
+#define                BPF_H           0x08
+#define                BPF_B           0x10
+/*                             0x18    reserved; used by BSD/OS */
+#define BPF_MODE(code) ((code) & 0xe0)
+#define                BPF_IMM         0x00
+#define                BPF_ABS         0x20
+#define                BPF_IND         0x40
+#define                BPF_MEM         0x60
+#define                BPF_LEN         0x80
+#define                BPF_MSH         0xa0
+/*                             0xc0    reserved; used by BSD/OS */
+/*                             0xe0    reserved; used by BSD/OS */
+
+/* alu/jmp fields */
+#define BPF_OP(code)   ((code) & 0xf0)
+#define                BPF_ADD         0x00
+#define                BPF_SUB         0x10
+#define                BPF_MUL         0x20
+#define                BPF_DIV         0x30
+#define                BPF_OR          0x40
+#define                BPF_AND         0x50
+#define                BPF_LSH         0x60
+#define                BPF_RSH         0x70
+#define                BPF_NEG         0x80
+#define                BPF_MOD         0x90
+#define                BPF_XOR         0xa0
+/*                             0xb0    reserved */
+/*                             0xc0    reserved */
+/*                             0xd0    reserved */
+/*                             0xe0    reserved */
+/*                             0xf0    reserved */
+
+#define                BPF_JA          0x00
+#define                BPF_JEQ         0x10
+#define                BPF_JGT         0x20
+#define                BPF_JGE         0x30
+#define                BPF_JSET        0x40
+/*                             0x50    reserved; used on BSD/OS */
+/*                             0x60    reserved */
+/*                             0x70    reserved */
+/*                             0x80    reserved */
+/*                             0x90    reserved */
+/*                             0xa0    reserved */
+/*                             0xb0    reserved */
+/*                             0xc0    reserved */
+/*                             0xd0    reserved */
+/*                             0xe0    reserved */
+/*                             0xf0    reserved */
+#define BPF_SRC(code)  ((code) & 0x08)
+#define                BPF_K           0x00
+#define                BPF_X           0x08
+
+/* ret - BPF_K and BPF_X also apply */
+#define BPF_RVAL(code) ((code) & 0x18)
+#define                BPF_A           0x10
+/*                             0x18    reserved */
+
+/* misc */
+#define BPF_MISCOP(code) ((code) & 0xf8)
+#define                BPF_TAX         0x00
+/*                             0x08    reserved */
+/*                             0x10    reserved */
+/*                             0x18    reserved */
+/* #define     BPF_COP         0x20    NetBSD "coprocessor" extensions */
+/*                             0x28    reserved */
+/*                             0x30    reserved */
+/*                             0x38    reserved */
+/* #define     BPF_COPX        0x40    NetBSD "coprocessor" extensions */
+/*                                     also used on BSD/OS */
+/*                             0x48    reserved */
+/*                             0x50    reserved */
+/*                             0x58    reserved */
+/*                             0x60    reserved */
+/*                             0x68    reserved */
+/*                             0x70    reserved */
+/*                             0x78    reserved */
+#define                BPF_TXA         0x80
+/*                             0x88    reserved */
+/*                             0x90    reserved */
+/*                             0x98    reserved */
+/*                             0xa0    reserved */
+/*                             0xa8    reserved */
+/*                             0xb0    reserved */
+/*                             0xb8    reserved */
+/*                             0xc0    reserved; used on BSD/OS */
+/*                             0xc8    reserved */
+/*                             0xd0    reserved */
+/*                             0xd8    reserved */
+/*                             0xe0    reserved */
+/*                             0xe8    reserved */
+/*                             0xf0    reserved */
+/*                             0xf8    reserved */
+
+/*
+ * The instruction data structure.
+ */
+struct bpf_insn {
+       u_short code;
+       u_char  jt;
+       u_char  jf;
+       bpf_u_int32 k;
+};
+
+#endif /* _PDUMP_BPF_H_ */
diff --git a/lib/librte_pdump/rte_pcap_filter.c 
b/lib/librte_pdump/rte_pcap_filter.c
new file mode 100644
index 000000000000..1d8caeee6628
--- /dev/null
+++ b/lib/librte_pdump/rte_pcap_filter.c
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This is code is derived from the libpcap bpf_filter which
+ * in turn is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_pdump.h>
+
+#include "pdump_bpf.h"
+
+/* These magic values are used to do negative offset to find vlan */
+#define SKF_AD_OFF    (-0x1000)
+#define SKF_AD_VLAN_TAG        44
+#define SKF_AD_VLAN_TAG_PRESENT 48
+
+#define EXTRACT32(p) rte_be_to_cpu_32(*(const unaligned_uint32_t *)(p))
+#define EXTRACT16(p) rte_be_to_cpu_16(*(const unaligned_uint16_t *)(p))
+
+static inline u_short vlan_present(const struct rte_mbuf *m)
+{
+       return  (m->ol_flags & (PKT_TX_VLAN|PKT_RX_VLAN_STRIPPED)) != 0;
+}
+
+/*
+ * Execute the filter program starting at pc on the packet p
+ * wirelen is the length of the original packet
+ * buflen is the amount of data present
+ * aux_data is auxiliary data, currently used only when interpreting
+ * filters intended for the Linux kernel in cases where the kernel
+ * rejects the filter; it contains VLAN tag information
+ * For the kernel, p is assumed to be a pointer to an mbuf if buflen is 0,
+ * in all other cases, p is a pointer to a buffer and buflen is its size.
+ *
+ * Thanks to Ani Sinha <[email protected]> for providing initial implementation
+ */
+int
+rte_pcap_filter(const void *filter, const struct rte_mbuf *m)
+{
+       const struct bpf_insn *pc = filter;
+       uint32_t buflen = rte_pktmbuf_data_len(m);
+       uint32_t wirelen = rte_pktmbuf_pkt_len(m);
+       const uint8_t *p = rte_pktmbuf_mtod(m, const uint8_t *);
+       uint32_t A, X;
+       bpf_u_int32 k;
+       uint32_t mem[BPF_MEMWORDS];
+
+       /* No filter means accept all. */
+       if (pc == NULL)
+               return -1;
+
+       A = 0;
+       X = 0;
+       --pc;
+       for (;;) {
+               ++pc;
+
+               switch (pc->code) {
+               default:
+                       /* this must be caught by validation */
+                       rte_panic("invalid BPF opcode\n");
+                       return 0;
+
+               case BPF_RET|BPF_K:
+                       return pc->k;
+
+               case BPF_RET|BPF_A:
+                       return A;
+
+               case BPF_LD|BPF_W|BPF_ABS:
+                       k = pc->k;
+                       if (k > buflen || sizeof(int32_t) > buflen - k)
+                               return 0;
+
+                       A = EXTRACT32(&p[k]);
+                       continue;
+
+               case BPF_LD|BPF_H|BPF_ABS:
+                       k = pc->k;
+                       if (k > buflen || sizeof(int16_t) > buflen - k)
+                               return 0;
+
+                       A = EXTRACT16(&p[k]);
+                       continue;
+
+               case BPF_LD|BPF_B|BPF_ABS:
+                       switch (pc->k) {
+                       case SKF_AD_OFF + SKF_AD_VLAN_TAG:
+                               A = m->vlan_tci;
+                               break;
+                       case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
+                               A = vlan_present(m);
+                               break;
+                       default:
+                               k = pc->k;
+                               if (k >= buflen)
+                                       return 0;
+
+                               A = p[k];
+                               break;
+                       }
+                       continue;
+
+               case BPF_LD|BPF_W|BPF_LEN:
+                       A = wirelen;
+                       continue;
+
+               case BPF_LDX|BPF_W|BPF_LEN:
+                       X = wirelen;
+                       continue;
+
+               case BPF_LD|BPF_W|BPF_IND:
+                       k = X + pc->k;
+                       if (pc->k > buflen || X > buflen - pc->k ||
+                           sizeof(int32_t) > buflen - k) {
+                               return 0;
+                       }
+                       A = EXTRACT32(&p[k]);
+                       continue;
+
+               case BPF_LD|BPF_H|BPF_IND:
+                       k = X + pc->k;
+                       if (X > buflen ||
+                           pc->k > buflen - X ||
+                           sizeof(int16_t) > buflen - k)
+                               return 0;
+
+                       A = EXTRACT16(&p[k]);
+                       continue;
+
+               case BPF_LD|BPF_B|BPF_IND:
+                       k = X + pc->k;
+                       if (pc->k >= buflen || X >= buflen - pc->k)
+                               return 0;
+
+                       A = p[k];
+                       continue;
+
+               case BPF_LDX|BPF_MSH|BPF_B:
+                       k = pc->k;
+                       if (k >= buflen)
+                               return 0;
+
+                       X = (p[pc->k] & 0xf) << 2;
+                       continue;
+
+               case BPF_LD|BPF_IMM:
+                       A = pc->k;
+                       continue;
+
+               case BPF_LDX|BPF_IMM:
+                       X = pc->k;
+                       continue;
+
+               case BPF_LD|BPF_MEM:
+                       A = mem[pc->k];
+                       continue;
+
+               case BPF_LDX|BPF_MEM:
+                       X = mem[pc->k];
+                       continue;
+
+               case BPF_ST:
+                       mem[pc->k] = A;
+                       continue;
+
+               case BPF_STX:
+                       mem[pc->k] = X;
+                       continue;
+
+               case BPF_JMP|BPF_JA:
+                       /*
+                        * XXX - we currently implement "ip6 protochain"
+                        * with backward jumps, so sign-extend pc->k.
+                        */
+                       pc += (bpf_int32)pc->k;
+                       continue;
+
+               case BPF_JMP|BPF_JGT|BPF_K:
+                       pc += (pc->k < A) ? pc->jt : pc->jf;
+                       continue;
+
+               case BPF_JMP|BPF_JGE|BPF_K:
+                       pc += (pc->k <= A) ? pc->jt : pc->jf;
+                       continue;
+
+               case BPF_JMP|BPF_JEQ|BPF_K:
+                       pc += (pc->k == A) ? pc->jt : pc->jf;
+                       continue;
+
+               case BPF_JMP|BPF_JSET|BPF_K:
+                       pc += (A & pc->k) ? pc->jt : pc->jf;
+                       continue;
+
+               case BPF_JMP|BPF_JGT|BPF_X:
+                       pc += (A > X) ? pc->jt : pc->jf;
+                       continue;
+
+               case BPF_JMP|BPF_JGE|BPF_X:
+                       pc += (A >= X) ? pc->jt : pc->jf;
+                       continue;
+
+               case BPF_JMP|BPF_JEQ|BPF_X:
+                       pc += (A == X) ? pc->jt : pc->jf;
+                       continue;
+
+               case BPF_JMP|BPF_JSET|BPF_X:
+                       pc += (A & X) ? pc->jt : pc->jf;
+                       continue;
+
+               case BPF_ALU|BPF_ADD|BPF_X:
+                       A += X;
+                       continue;
+
+               case BPF_ALU|BPF_SUB|BPF_X:
+                       A -= X;
+                       continue;
+
+               case BPF_ALU|BPF_MUL|BPF_X:
+                       A *= X;
+                       continue;
+
+               case BPF_ALU|BPF_DIV|BPF_X:
+                       if (X == 0)
+                               return 0;
+                       A /= X;
+                       continue;
+
+               case BPF_ALU|BPF_MOD|BPF_X:
+                       if (X == 0)
+                               return 0;
+                       A %= X;
+                       continue;
+
+               case BPF_ALU|BPF_AND|BPF_X:
+                       A &= X;
+                       continue;
+
+               case BPF_ALU|BPF_OR|BPF_X:
+                       A |= X;
+                       continue;
+
+               case BPF_ALU|BPF_XOR|BPF_X:
+                       A ^= X;
+                       continue;
+
+               case BPF_ALU|BPF_LSH|BPF_X:
+                       if (X < 32)
+                               A <<= X;
+                       else
+                               A = 0;
+                       continue;
+
+               case BPF_ALU|BPF_RSH|BPF_X:
+                       if (X < 32)
+                               A >>= X;
+                       else
+                               A = 0;
+                       continue;
+
+               case BPF_ALU|BPF_ADD|BPF_K:
+                       A += pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_SUB|BPF_K:
+                       A -= pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_MUL|BPF_K:
+                       A *= pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_DIV|BPF_K:
+                       A /= pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_MOD|BPF_K:
+                       A %= pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_AND|BPF_K:
+                       A &= pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_OR|BPF_K:
+                       A |= pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_XOR|BPF_K:
+                       A ^= pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_LSH|BPF_K:
+                       A <<= pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_RSH|BPF_K:
+                       A >>= pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_NEG:
+                       /*
+                        * Most BPF arithmetic is unsigned, but negation
+                        * can't be unsigned; respecify it as subtracting
+                        * the accumulator from 0U, so that 1) we don't
+                        * get compiler warnings about negating an unsigned
+                        * value and 2) don't get UBSan warnings about
+                        * the result of negating 0x80000000 being undefined.
+                        */
+                       A = (0U - A);
+                       continue;
+
+               case BPF_MISC|BPF_TAX:
+                       X = A;
+                       continue;
+
+               case BPF_MISC|BPF_TXA:
+                       A = X;
+                       continue;
+               }
+       }
+}
+
+/*
+ * Return true if the 'fcode' is a valid filter program.
+ * The constraints are that each jump be forward and to a valid
+ * code, that memory accesses are within valid ranges (to the
+ * extent that this can be checked statically; loads of packet
+ * data have to be, and are, also checked at run time), and that
+ * the code terminates with either an accept or reject.
+ */
+int
+rte_pcap_validate_filter(const void *filter, uint32_t len)
+{
+       const struct bpf_insn *f = filter;
+       unsigned int i, from;
+
+       if (len < 1)
+               return 0;
+
+       for (i = 0; i < len; ++i) {
+               const struct bpf_insn *p = &f[i];
+
+               switch (BPF_CLASS(p->code)) {
+               /*
+                * Check that memory operations use valid addresses.
+                */
+               case BPF_LD:
+               case BPF_LDX:
+                       switch (BPF_MODE(p->code)) {
+                       case BPF_IMM:
+                               break;
+                       case BPF_ABS:
+                       case BPF_IND:
+                       case BPF_MSH:
+                               /*
+                                * There's no maximum packet data size
+                                * in userland.  The runtime packet length
+                                * check suffices.
+                                */
+                               break;
+                       case BPF_MEM:
+                               if (p->k >= BPF_MEMWORDS)
+                                       return 0;
+                               break;
+                       case BPF_LEN:
+                               break;
+                       default:
+                               return 0;
+                       }
+                       break;
+               case BPF_ST:
+               case BPF_STX:
+                       if (p->k >= BPF_MEMWORDS)
+                               return 0;
+                       break;
+               case BPF_ALU:
+                       switch (BPF_OP(p->code)) {
+                       case BPF_ADD:
+                       case BPF_SUB:
+                       case BPF_MUL:
+                       case BPF_OR:
+                       case BPF_AND:
+                       case BPF_XOR:
+                       case BPF_LSH:
+                       case BPF_RSH:
+                       case BPF_NEG:
+                               break;
+                       case BPF_DIV:
+                       case BPF_MOD:
+                               /*
+                                * Check for constant division or modulus
+                                * by 0.
+                                */
+                               if (BPF_SRC(p->code) == BPF_K && p->k == 0)
+                                       return 0;
+                               break;
+                       default:
+                               return 0;
+                       }
+                       break;
+               case BPF_JMP:
+                       /*
+                        * Check that jumps are within the code block,
+                        * and that unconditional branches don't go
+                        * backwards as a result of an overflow.
+                        * Unconditional branches have a 32-bit offset,
+                        * so they could overflow; we check to make
+                        * sure they don't.  Conditional branches have
+                        * an 8-bit offset, and the from address is <=
+                        * BPF_MAXINSNS, and we assume that BPF_MAXINSNS
+                        * is sufficiently small that adding 255 to it
+                        * won't overflow.
+                        *
+                        * We know that len is <= BPF_MAXINSNS, and we
+                        * assume that BPF_MAXINSNS is < the maximum size
+                        * of a unsigned int, so that i + 1 doesn't overflow.
+                        *
+                        * For userland, we don't know that the from
+                        * or len are <= BPF_MAXINSNS, but we know that
+                        * from <= len, and, except on a 64-bit system,
+                        * it's unlikely that len, if it truly reflects
+                        * the size of the program we've been handed,
+                        * will be anywhere near the maximum size of
+                        * a unsigned int.  We also don't check for backward
+                        * branches, as we currently support them in
+                        * userland for the protochain operation.
+                        */
+                       from = i + 1;
+                       switch (BPF_OP(p->code)) {
+                       case BPF_JA:
+                               if (from + p->k >= (unsigned int)len)
+                                       return 0;
+                               break;
+                       case BPF_JEQ:
+                       case BPF_JGT:
+                       case BPF_JGE:
+                       case BPF_JSET:
+                               if (from + p->jt >= (unsigned int)len ||
+                                   from + p->jf >= (unsigned int)len)
+                                       return 0;
+                               break;
+                       default:
+                               return 0;
+                       }
+                       break;
+               case BPF_RET:
+                       break;
+               case BPF_MISC:
+                       break;
+               default:
+                       return 0;
+               }
+       }
+
+       return BPF_CLASS(f[len - 1].code) == BPF_RET;
+}
diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
index 41f2ec17a26b..1206671c6f60 100644
--- a/lib/librte_pdump/rte_pdump.c
+++ b/lib/librte_pdump/rte_pdump.c
@@ -8,11 +8,13 @@
 #include <rte_lcore.h>
 #include <rte_log.h>
 #include <rte_errno.h>
+#include <rte_malloc.h>
 #include <rte_string_fns.h>
 
 #include "rte_pdump.h"
 
 #define DEVICE_ID_SIZE 64
+#define BPF_INS_SIZE sizeof(uint64_t)
 
 /* Macro for printing using RTE_LOG */
 static int pdump_logtype;
@@ -32,6 +34,8 @@ enum pdump_version {
        V1 = 1
 };
 
+#define PDUMP_FILTER_V1        0x7064756d7066696c
+
 struct pdump_request {
        uint16_t ver;
        uint16_t op;
@@ -42,14 +46,14 @@ struct pdump_request {
                        uint16_t queue;
                        struct rte_ring *ring;
                        struct rte_mempool *mp;
-                       void *filter;
+                       const void *filter;
                } en_v1;
                struct disable_v1 {
                        char device[DEVICE_ID_SIZE];
                        uint16_t queue;
                        struct rte_ring *ring;
                        struct rte_mempool *mp;
-                       void *filter;
+                       const void *filter;
                } dis_v1;
        } data;
 };
@@ -64,7 +68,7 @@ static struct pdump_rxtx_cbs {
        struct rte_ring *ring;
        struct rte_mempool *mp;
        const struct rte_eth_rxtx_callback *cb;
-       void *filter;
+       const void *filter;
 } rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT],
 tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
 
@@ -86,6 +90,9 @@ pdump_copy(uint16_t port, struct rte_mbuf **pkts,
        ring = cbs->ring;
        mp = cbs->mp;
        for (i = 0; i < nb_pkts; i++) {
+               if (rte_pcap_filter(cbs->filter, pkts[i]) == 0)
+                       continue;
+
                p = rte_pktmbuf_copy(pkts[i], mp, 0, UINT32_MAX);
                if (p) {
                        p->port = port;
@@ -124,8 +131,8 @@ pdump_tx(uint16_t port, uint16_t qidx __rte_unused,
 
 static int
 pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
-                               struct rte_ring *ring, struct rte_mempool *mp,
-                               uint16_t operation)
+                           struct rte_ring *ring, struct rte_mempool *mp,
+                           uint16_t operation, const void *filter)
 {
        uint16_t qid;
        struct pdump_rxtx_cbs *cbs = NULL;
@@ -143,6 +150,7 @@ pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, 
uint16_t queue,
                        }
                        cbs->ring = ring;
                        cbs->mp = mp;
+                       cbs->filter = filter;
                        cbs->cb = rte_eth_add_first_rx_callback(port, qid,
                                                                pdump_rx, cbs);
                        if (cbs->cb == NULL) {
@@ -178,8 +186,8 @@ pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, 
uint16_t queue,
 
 static int
 pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
-                               struct rte_ring *ring, struct rte_mempool *mp,
-                               uint16_t operation)
+                           struct rte_ring *ring, struct rte_mempool *mp,
+                           uint16_t operation, const void *filter)
 {
 
        uint16_t qid;
@@ -198,6 +206,7 @@ pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, 
uint16_t queue,
                        }
                        cbs->ring = ring;
                        cbs->mp = mp;
+                       cbs->filter = filter;
                        cbs->cb = rte_eth_add_tx_callback(port, qid, pdump_tx,
                                                                cbs);
                        if (cbs->cb == NULL) {
@@ -241,6 +250,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p)
        uint16_t operation;
        struct rte_ring *ring;
        struct rte_mempool *mp;
+       const void *filter = NULL;
 
        flags = p->flags;
        operation = p->op;
@@ -256,6 +266,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p)
                queue = p->data.en_v1.queue;
                ring = p->data.en_v1.ring;
                mp = p->data.en_v1.mp;
+               filter = p->data.en_v1.filter;
        } else {
                ret = rte_eth_dev_get_port_by_name(p->data.dis_v1.device,
                                &port);
@@ -299,7 +310,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p)
        if (flags & RTE_PDUMP_FLAG_RX) {
                end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_rx_q : queue + 1;
                ret = pdump_register_rx_callbacks(end_q, port, queue, ring, mp,
-                                                       operation);
+                                                 operation, filter);
                if (ret < 0)
                        return ret;
        }
@@ -308,7 +319,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p)
        if (flags & RTE_PDUMP_FLAG_TX) {
                end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_tx_q : queue + 1;
                ret = pdump_register_tx_callbacks(end_q, port, queue, ring, mp,
-                                                       operation);
+                                                 operation, filter);
                if (ret < 0)
                        return ret;
        }
@@ -424,12 +435,41 @@ pdump_validate_port(uint16_t port, char *name)
 }
 
 static int
-pdump_prepare_client_request(char *device, uint16_t queue,
-                               uint32_t flags,
-                               uint16_t operation,
-                               struct rte_ring *ring,
-                               struct rte_mempool *mp,
-                               void *filter)
+pdump_validate_filter(const void *filter, unsigned int len)
+{
+       size_t alloc_len;
+
+       if (filter == NULL)
+               return 0;
+
+       /* must be in malloc memory to be accesible in primary */
+       if (rte_malloc_validate(filter, &alloc_len) != 0) {
+               PDUMP_LOG(ERR, "filter is not in rte_malloc memory\n");
+               rte_errno = EINVAL;
+               return -1;
+       }
+
+       if (len * BPF_INS_SIZE > alloc_len) {
+               PDUMP_LOG(ERR, "filter length error\n");
+               rte_errno = EINVAL;
+               return -1;
+       }
+
+       if (!rte_pcap_validate_filter(filter, len)) {
+               PDUMP_LOG(ERR, "filter is not valid BPF code\n");
+               rte_errno = EINVAL;
+               return -1;
+       }
+       return 0;
+}
+
+static int
+pdump_prepare_client_request(const char *device, uint16_t queue,
+                            uint32_t flags,
+                            uint16_t operation,
+                            struct rte_ring *ring,
+                            struct rte_mempool *mp,
+                            const void *filter)
 {
        int ret = -1;
        struct rte_mp_msg mp_req, *mp_rep;
@@ -476,14 +516,13 @@ pdump_prepare_client_request(char *device, uint16_t queue,
 }
 
 int
-rte_pdump_enable(uint16_t port, uint16_t queue, uint32_t flags,
-                       struct rte_ring *ring,
-                       struct rte_mempool *mp,
-                       void *filter)
+rte_pdump_enable_v1911(uint16_t port, uint16_t queue, uint32_t flags,
+                      struct rte_ring *ring, struct rte_mempool *mp,
+                      const void *filter, uint32_t filter_len)
 {
 
-       int ret = 0;
        char name[DEVICE_ID_SIZE];
+       int ret;
 
        ret = pdump_validate_port(port, name);
        if (ret < 0)
@@ -492,36 +531,86 @@ rte_pdump_enable(uint16_t port, uint16_t queue, uint32_t 
flags,
        if (ret < 0)
                return ret;
        ret = pdump_validate_flags(flags);
+       if (ret < 0)
+               return ret;
+       ret = pdump_validate_filter(filter, filter_len);
        if (ret < 0)
                return ret;
 
        ret = pdump_prepare_client_request(name, queue, flags,
-                                               ENABLE, ring, mp, filter);
+                                          ENABLE, ring, mp, filter);
 
        return ret;
 }
+BIND_DEFAULT_SYMBOL(rte_pdump_enable, _v1911, 19.11);
+MAP_STATIC_SYMBOL(int rte_pdump_enable(uint16_t port, uint16_t queue,
+                                      uint32_t flags, struct rte_ring *ring,
+                                      struct rte_mempool *mp,
+                                      const void *filter, uint32_t len),
+                 rte_pdump_enable_v1911);
 
 int
-rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue,
-                               uint32_t flags,
-                               struct rte_ring *ring,
-                               struct rte_mempool *mp,
-                               void *filter)
+rte_pdump_enable_v1607(uint16_t port, uint16_t queue, uint32_t flags,
+                      struct rte_ring *ring,
+                      struct rte_mempool *mp,
+                      void *filter)
 {
-       int ret = 0;
+       if (filter != NULL)
+               PDUMP_LOG(WARNING, "filter not supported in this version\n");
+
+       return rte_pdump_enable_v1911(port, queue, flags, ring, mp,
+                                     NULL, 0);
+}
+VERSION_SYMBOL(rte_pdump_enable, _v1607, 16.07);
+
+int
+rte_pdump_enable_by_deviceid_v1911(const char *device_id, uint16_t queue,
+                                  uint32_t flags,
+                                  struct rte_ring *ring,
+                                  struct rte_mempool *mp,
+                                  const void *filter, uint32_t filter_len)
+{
+       int ret;
 
        ret = pdump_validate_ring_mp(ring, mp);
        if (ret < 0)
                return ret;
        ret = pdump_validate_flags(flags);
+       if (ret < 0)
+               return ret;
+       ret = pdump_validate_filter(filter, filter_len);
        if (ret < 0)
                return ret;
 
        ret = pdump_prepare_client_request(device_id, queue, flags,
-                                               ENABLE, ring, mp, filter);
+                                          ENABLE, ring, mp, filter);
 
        return ret;
 }
+BIND_DEFAULT_SYMBOL(rte_pdump_enable_by_deviceid, _v1911, 19.11);
+MAP_STATIC_SYMBOL(int rte_pdump_enable_by_deviceid(const char *device_id,
+                                                  uint16_t queue,
+                                                  uint32_t flags,
+                                                  struct rte_ring *ring,
+                                                  struct rte_mempool *mp,
+                                                  const void *filter,
+                                                  uint32_t len),
+                 rte_pdump_enable_by_deviceid_v1911);
+
+int
+rte_pdump_enable_by_deviceid_v1607(char *device_id, uint16_t queue,
+                                  uint32_t flags,
+                                  struct rte_ring *ring,
+                                  struct rte_mempool *mp,
+                                  void *filter)
+{
+       if (filter != NULL)
+               PDUMP_LOG(WARNING, "filter not supported in this version\n");
+
+       return rte_pdump_enable_by_deviceid_v1911(device_id, queue, flags,
+                                                 ring, mp, NULL, 0);
+}
+VERSION_SYMBOL(rte_pdump_enable_by_deviceid, _v1607, 16.07);
 
 int
 rte_pdump_disable(uint16_t port, uint16_t queue, uint32_t flags)
diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h
index 6b00fc17aeb2..12cb46f8b0e9 100644
--- a/lib/librte_pdump/rte_pdump.h
+++ b/lib/librte_pdump/rte_pdump.h
@@ -68,17 +68,25 @@ rte_pdump_uninit(void);
  * @param mp
  *  mempool on to which original packets will be mirrored or duplicated.
  * @param filter
- *  place holder for packet filtering.
+ *  filter to apply to incoming packet (classic BPF)
+ * @param len
+ *  length of filter (in BPF instructions)
  *
  * @return
  *    0 on success, -1 on error, rte_errno is set accordingly.
  */
-
 int
 rte_pdump_enable(uint16_t port, uint16_t queue, uint32_t flags,
-               struct rte_ring *ring,
-               struct rte_mempool *mp,
-               void *filter);
+                struct rte_ring *ring, struct rte_mempool *mp,
+                const void *filter, uint32_t len);
+int
+rte_pdump_enable_v1607(uint16_t port, uint16_t queue, uint32_t flags,
+                      struct rte_ring *ring, struct rte_mempool *mp,
+                      void *filter);
+int
+rte_pdump_enable_v1911(uint16_t port, uint16_t queue, uint32_t flags,
+                      struct rte_ring *ring, struct rte_mempool *mp,
+                      const void *filter, uint32_t len);
 
 /**
  * Disables packet capturing on given port and queue.
@@ -118,18 +126,29 @@ rte_pdump_disable(uint16_t port, uint16_t queue, uint32_t 
flags);
  * @param mp
  *  mempool on to which original packets will be mirrored or duplicated.
  * @param filter
- *  place holder for packet filtering.
+ *  filter to apply to incoming packet (classic BPF)
+ * @param len
+ *  length of filter (in BPF instructions)
  *
  * @return
  *    0 on success, -1 on error, rte_errno is set accordingly.
  */
-
 int
-rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue,
-                               uint32_t flags,
-                               struct rte_ring *ring,
-                               struct rte_mempool *mp,
-                               void *filter);
+rte_pdump_enable_by_deviceid(const char *device_id, uint16_t queue,
+                            uint32_t flags,
+                            struct rte_ring *ring,
+                            struct rte_mempool *mp,
+                            const void *filter, uint32_t len);
+int
+rte_pdump_enable_by_deviceid_v1607(char *device_id, uint16_t queue,
+                                  uint32_t flags, struct rte_ring *ring,
+                                  struct rte_mempool *mp,
+                                  void *filter);
+int
+rte_pdump_enable_by_deviceid_v1911(const char *device_id, uint16_t queue,
+                                  uint32_t flags, struct rte_ring *ring,
+                                  struct rte_mempool *mp,
+                                  const void *filter, uint32_t len);
 
 /**
  * Disables packet capturing on given device_id and queue.
@@ -151,7 +170,16 @@ rte_pdump_enable_by_deviceid(char *device_id, uint16_t 
queue,
  */
 int
 rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue,
-                               uint32_t flags);
+                             uint32_t flags);
+
+
+/* internal */
+int
+rte_pcap_filter(const void *filter, const struct rte_mbuf *m);
+
+/* internal */
+int
+rte_pcap_validate_filter(const void *filter, uint32_t len);
 
 #ifdef __cplusplus
 }
diff --git a/lib/librte_pdump/rte_pdump_version.map 
b/lib/librte_pdump/rte_pdump_version.map
index 3e744f30123c..e78ba5a8350a 100644
--- a/lib/librte_pdump/rte_pdump_version.map
+++ b/lib/librte_pdump/rte_pdump_version.map
@@ -10,3 +10,10 @@ DPDK_16.07 {
 
        local: *;
 };
+
+DPDK_19.11 {
+       global:
+
+       rte_pdump_enable;
+       rte_pdump_enable_by_deviceid;
+} DPDK_16.07;
-- 
2.20.1

Reply via email to