On Mon, Apr 04, 2016 at 06:41:00PM +0200, Martin Pieuchot wrote:
> On 04/04/16(Mon) 22:56, David Gwynne wrote:
> > On Mon, Apr 04, 2016 at 08:07:47PM +1000, David Gwynne wrote:
> > > > On 4 Apr 2016, at 6:41 PM, Martin Pieuchot <[email protected]> wrote:
> > > > On 04/04/16(Mon) 13:09, David Gwynne wrote:
> > > >> #ifdef DDB
> > > >> @@ -593,6 +594,12 @@ if_enqueue(struct ifnet *ifp, struct mbu
> > > >> struct mbuf_queue if_input_queue = MBUF_QUEUE_INITIALIZER(8192,
> > > >> IPL_NET);
> > > >> struct task if_input_task = TASK_INITIALIZER(if_input_process,
> > > >> &if_input_queue);
> > > >>
> > > >> +int
> > > >> +if_input_filter(void *if_bpf, const struct mbuf *m)
> > > >> +{
> > > >> + return bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN);
> > > >> +}
> > > >> +
> > > >
> > > > I find this filtering function confusing. What about introducing a
> > > > bpf_mtap() variant dealing with an ``mbuf_list'' : bpf_mltap()?
> > >
> > > i could, but passing the direction through is difficult. i could have
> > > different wrappers for different directions though.
>
> struct mbuf_list bpf_mltap(cadd_t arg, struct mbuf_list *ml, u_int dir);
>
> How is that difficult?
presuming bpf_mltap is just a wrapper around ml_filter, you need a
way to carry the direction through to the per mbuf processing. this
is the least worst way i came up with:
struct bpf_mltap_ctx {
caddr_t arg;
u_int direction;
};
int
bpf_mltap_ether_filter(void *v, const struct mbuf *m)
{
struct bpf_mltap_ctx *ctx = v;
return bpf_mtap_ether(ctx->arg, m, ctx->direction);
}
struct mbuf *
bpf_mltap_ether(caddr_t arg, struct mbuf_list *ml, u_int direction)
{
struct bpf_mltap_ctx ctx;
ctx.arg = arg;
ctx.direction = direction;
return ml_filter(ml, bpf_mltap_ether_filter, &ctx);
}
the alternative is to iterate over the list in the mltap function
itself and test them one by one. if you thought freeing a list of
mbufs was hairy code that shouldnt escape uipc_mbuf.c, you should
have a look at what ml_filter does to remove packets in place. the
simpler option is to dequeue from the list and process them one at
a time that way. like this:
struct mbuf *
bpf_mltap_ether(caddr_t arg, struct mbuf_list *ml, u_int direction)
{
struct mbuf_list keep = MBUF_LIST_INITIALIZER();
struct mbuf_list drop = MBUF_LIST_INITIALIZER();
struct mbuf *m;
while ((m = ml_dequeue(ml)) != NULL) {
if (bpf_mtap_ether(arg, m, direction) != 0)
ml_enqueue(&drop, m);
else
ml_enqueue(&keep, m);
}
*ml = keep;
return ml_dechain(&drop);
}
im not a huge fan of assigning mbuf_lists by value outside uipc_mbuf.c
either, it assumes too much about whats inside an mbuf_list. however,
the alternative is while ((m = ml_dequeue(&keep)) != NULL)
ml_enqueue(ml, m); which is a lot of work for the common case.
it seems like a lot of effort to abstract away something thats
only called from one place.
>
> > > > What about returning a mbuf (or a mbuf_list) instead of a boolean and
> > > > check for NULL? This would fit in the input_handler model where a
> > > > packet is "consumed", here by bpf(4), and processing should stop.
> > > >
> > > > You could then write:
> > > >
> > > > m = bpf_mtap(if->if_bpf, m, BPF_DIRECTION_IN);
> > > > if (m == NULL)
> > > > return;
> > >
> > > i wouldnt be able to pass the mbuf as a const struct mbuf * and return it
> > > without casting it to drop the const qualifier. to me it feels more like
> > > its being tested than consumed.
>
> It is tested because you define it as const, or is it the other way
> around?
because bpf_tap things in our kernel and others is traditionally a
thing that lets userland try and get packets, and the implicit
behaviour has always been that it only reads the mbufs. the const
makes that explicit.
> > also consider that if bpf_mtap freed the mbuf, every caller would
> > have to be fixed to check for that. at the moment we only respect
> > drops on incoming packets.
>
> So put a KASSERT if the direction is outgoing so you don't have to fix
> all the callers.
so just the inbound callers in all the wireless drivers and pseudo
devices would need to be fixed.
> > void
> > if_input(struct ifnet *ifp, struct mbuf_list *ml)
> > {
> > @@ -614,8 +621,15 @@ if_input(struct ifnet *ifp, struct mbuf_
> > #if NBPFILTER > 0
> > if_bpf = ifp->if_bpf;
> > if (if_bpf) {
> > - MBUF_LIST_FOREACH(ml, m)
> > - bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN);
> > + struct mbuf *m0;
> > +
> > + m0 = ml_filter(ml, if_input_filter, if_bpf);
> > + while (m0 != NULL) {
> > + m = m0;
> > + m0 = m->m_nextpkt;
> > +
> > + m_freem(m);
> > + }
>
> Do you really want to add this mbuf horror show here? It looks like
> you've spent too much time in kern/uipc_mbuf.c recently!
i guess queue.h makes us feel upset when we see bare list manipulations.
ive replaced this with m_purge.
Index: net/bpf.c
===================================================================
RCS file: /cvs/src/sys/net/bpf.c,v
retrieving revision 1.138
diff -u -p -r1.138 bpf.c
--- net/bpf.c 2 Apr 2016 08:49:49 -0000 1.138
+++ net/bpf.c 8 Apr 2016 05:03:03 -0000
@@ -92,7 +92,7 @@ LIST_HEAD(, bpf_d) bpf_d_list;
void bpf_allocbufs(struct bpf_d *);
void bpf_freed(struct bpf_d *);
void bpf_ifname(struct ifnet *, struct ifreq *);
-int _bpf_mtap(caddr_t, struct mbuf *, u_int,
+int _bpf_mtap(caddr_t, const struct mbuf *, u_int,
void (*)(const void *, void *, size_t));
void bpf_mcopy(const void *, void *, size_t);
int bpf_movein(struct uio *, u_int, struct mbuf **,
@@ -1206,14 +1206,14 @@ bpf_mcopy(const void *src_arg, void *dst
* like bpf_mtap, but copy fn can be given. used by various bpf_mtap*
*/
int
-_bpf_mtap(caddr_t arg, struct mbuf *m, u_int direction,
+_bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction,
void (*cpfn)(const void *, void *, size_t))
{
struct bpf_if *bp = (struct bpf_if *)arg;
struct srpl_iter i;
struct bpf_d *d;
size_t pktlen, slen;
- struct mbuf *m0;
+ const struct mbuf *m0;
struct timeval tv;
int gottime = 0;
int drop = 0;
@@ -1267,9 +1267,6 @@ _bpf_mtap(caddr_t arg, struct mbuf *m, u
}
SRPL_LEAVE(&i, d);
- if (drop)
- m->m_flags |= M_FILDROP;
-
return (drop);
}
@@ -1277,7 +1274,7 @@ _bpf_mtap(caddr_t arg, struct mbuf *m, u
* Incoming linkage from device drivers, when packet is in an mbuf chain.
*/
int
-bpf_mtap(caddr_t arg, struct mbuf *m, u_int direction)
+bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction)
{
return _bpf_mtap(arg, m, direction, NULL);
}
@@ -1292,28 +1289,22 @@ bpf_mtap(caddr_t arg, struct mbuf *m, u_
* it or keep a pointer to it.
*/
int
-bpf_mtap_hdr(caddr_t arg, caddr_t data, u_int dlen, struct mbuf *m,
+bpf_mtap_hdr(caddr_t arg, const void *data, u_int dlen, const struct mbuf *m,
u_int direction, void (*cpfn)(const void *, void *, size_t))
{
struct m_hdr mh;
- struct mbuf *m0;
- int drop;
+ const struct mbuf *m0;
if (dlen > 0) {
mh.mh_flags = 0;
- mh.mh_next = m;
+ mh.mh_next = (struct mbuf *)m;
mh.mh_len = dlen;
- mh.mh_data = data;
+ mh.mh_data = (caddr_t)data;
m0 = (struct mbuf *)&mh;
} else
m0 = m;
- drop = _bpf_mtap(arg, m0, direction, cpfn);
-
- if (m0 != m)
- m->m_flags |= m0->m_flags & M_FILDROP;
-
- return (drop);
+ return _bpf_mtap(arg, m0, direction, cpfn);
}
/*
@@ -1326,7 +1317,7 @@ bpf_mtap_hdr(caddr_t arg, caddr_t data,
* it or keep a pointer to it.
*/
int
-bpf_mtap_af(caddr_t arg, u_int32_t af, struct mbuf *m, u_int direction)
+bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction)
{
u_int32_t afh;
@@ -1346,7 +1337,7 @@ bpf_mtap_af(caddr_t arg, u_int32_t af, s
* it or keep a pointer to it.
*/
int
-bpf_mtap_ether(caddr_t arg, struct mbuf *m, u_int direction)
+bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction)
{
#if NVLAN > 0
struct ether_vlan_header evh;
Index: net/bpf.h
===================================================================
RCS file: /cvs/src/sys/net/bpf.h,v
retrieving revision 1.55
diff -u -p -r1.55 bpf.h
--- net/bpf.h 3 Apr 2016 01:37:26 -0000 1.55
+++ net/bpf.h 8 Apr 2016 05:03:03 -0000
@@ -289,11 +289,11 @@ struct mbuf;
int bpf_validate(struct bpf_insn *, int);
int bpf_tap(caddr_t, u_char *, u_int, u_int);
-int bpf_mtap(caddr_t, struct mbuf *, u_int);
-int bpf_mtap_hdr(caddr_t, caddr_t, u_int, struct mbuf *, u_int,
+int bpf_mtap(caddr_t, const struct mbuf *, u_int);
+int bpf_mtap_hdr(caddr_t, const void *, u_int, const struct mbuf *, u_int,
void (*)(const void *, void *, size_t));
-int bpf_mtap_af(caddr_t, u_int32_t, struct mbuf *, u_int);
-int bpf_mtap_ether(caddr_t, struct mbuf *, u_int);
+int bpf_mtap_af(caddr_t, u_int32_t, const struct mbuf *, u_int);
+int bpf_mtap_ether(caddr_t, const struct mbuf *, u_int);
void bpfattach(caddr_t *, struct ifnet *, u_int, u_int);
void bpfdetach(struct ifnet *);
void bpfilterattach(int);
Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.429
diff -u -p -r1.429 if.c
--- net/if.c 16 Mar 2016 12:08:09 -0000 1.429
+++ net/if.c 8 Apr 2016 05:03:03 -0000
@@ -147,6 +147,7 @@ int if_group_egress_build(void);
void if_watchdog_task(void *);
+int if_input_filter(void *, const struct mbuf *);
void if_input_process(void *);
#ifdef DDB
@@ -593,6 +594,12 @@ if_enqueue(struct ifnet *ifp, struct mbu
struct mbuf_queue if_input_queue = MBUF_QUEUE_INITIALIZER(8192, IPL_NET);
struct task if_input_task = TASK_INITIALIZER(if_input_process,
&if_input_queue);
+int
+if_input_filter(void *if_bpf, const struct mbuf *m)
+{
+ return bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN);
+}
+
void
if_input(struct ifnet *ifp, struct mbuf_list *ml)
{
@@ -614,8 +621,8 @@ if_input(struct ifnet *ifp, struct mbuf_
#if NBPFILTER > 0
if_bpf = ifp->if_bpf;
if (if_bpf) {
- MBUF_LIST_FOREACH(ml, m)
- bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN);
+ m = ml_filter(ml, if_input_filter, if_bpf);
+ m_purge(m);
}
#endif
Index: net/if_ethersubr.c
===================================================================
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.235
diff -u -p -r1.235 if_ethersubr.c
--- net/if_ethersubr.c 1 Apr 2016 04:03:35 -0000 1.235
+++ net/if_ethersubr.c 8 Apr 2016 05:03:03 -0000
@@ -340,11 +340,10 @@ ether_input(struct ifnet *ifp, struct mb
}
/*
- * If packet has been filtered by the bpf listener, drop it now
- * also HW vlan tagged packets that were not collected by vlan(4)
+ * HW vlan tagged packets that were not collected by vlan(4)
* must be dropped now.
*/
- if (m->m_flags & (M_FILDROP | M_VLANTAG)) {
+ if (ISSET(m->m_flags, M_VLANTAG)) {
m_freem(m);
return (1);
}
Index: net80211/ieee80211_input.c
===================================================================
RCS file: /cvs/src/sys/net80211/ieee80211_input.c,v
retrieving revision 1.169
diff -u -p -r1.169 ieee80211_input.c
--- net80211/ieee80211_input.c 22 Mar 2016 11:37:35 -0000 1.169
+++ net80211/ieee80211_input.c 8 Apr 2016 05:03:03 -0000
@@ -546,14 +546,14 @@ ieee80211_input(struct ifnet *ifp, struc
if (ifp->if_flags & IFF_DEBUG)
ieee80211_input_print(ic, ifp, wh, rxi);
#if NBPFILTER > 0
- if (ic->ic_rawbpf)
- bpf_mtap(ic->ic_rawbpf, m, BPF_DIRECTION_IN);
- /*
- * Drop mbuf if it was filtered by bpf. Normally, this is
- * done in ether_input() but IEEE 802.11 management frames
- * are a special case.
- */
- if (m->m_flags & M_FILDROP) {
+ if (bpf_mtap(ic->ic_rawbpf, m, BPF_DIRECTION_IN) != 0) {
+ /*
+ * Drop mbuf if it was filtered by
+ * bpf. Normally, this is done in
+ * ether_input() but IEEE 802.11
+ * management frames are a special
+ * case.
+ */
m_freem(m);
return;
}
Index: sys/mbuf.h
===================================================================
RCS file: /cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.211
diff -u -p -r1.211 mbuf.h
--- sys/mbuf.h 8 Apr 2016 03:13:38 -0000 1.211
+++ sys/mbuf.h 8 Apr 2016 05:03:03 -0000
@@ -181,7 +181,6 @@ struct mbuf {
/* mbuf pkthdr flags, also in m_flags */
#define M_VLANTAG 0x0020 /* ether_vtag is valid */
#define M_LOOP 0x0040 /* for Mbuf statistics */
-#define M_FILDROP 0x0080 /* dropped by bpf filter */
#define M_BCAST 0x0100 /* send/received as link-level
broadcast */
#define M_MCAST 0x0200 /* send/received as link-level
multicast */
#define M_CONF 0x0400 /* payload was encrypted (ESP-transport) */
@@ -194,13 +193,13 @@ struct mbuf {
#ifdef _KERNEL
#define M_BITS \
("\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_EXTWR\5M_PROTO1\6M_VLANTAG\7M_LOOP" \
- "\10M_FILDROP\11M_BCAST\12M_MCAST\13M_CONF\14M_AUTH\15M_TUNNEL" \
+ "\11M_BCAST\12M_MCAST\13M_CONF\14M_AUTH\15M_TUNNEL" \
"\16M_ZEROIZE\17M_COMP\20M_LINK0")
#endif
/* flags copied when copying m_pkthdr */
#define M_COPYFLAGS
(M_PKTHDR|M_EOR|M_PROTO1|M_BCAST|M_MCAST|M_CONF|M_COMP|\
- M_AUTH|M_LOOP|M_TUNNEL|M_LINK0|M_VLANTAG|M_FILDROP|\
+ M_AUTH|M_LOOP|M_TUNNEL|M_LINK0|M_VLANTAG|\
M_ZEROIZE)
/* Checksumming flags */