My last attempt in "let etherip(4) output directly to the ip stack" didn't go so well. This one at least doesn't break bridge(4) on Ethernet.
This diff effectively turns if_enqueue into a per interface function pointer. Things still call if_enqueue(), which does some pf stuff, and then it calls ifp->if_enqueue. By default, that function pointer is if_enqueue_ifq() which puts the packet on the interface send queues. Ethernet interfaces intercept the call to if_enqueue_ifq(). They get set up to call ether_enqueue, which does the bridge stuff and then calls ac->ac_enqueue, which shadows the ifp->if_enqueue functionality. This takes Ethernet bridge(4) knowledge out of the generic interface code. Non-Ethernet things that want to bypass ifqs can override ifp->if_enqueue. Ethernet things can override ac->ac_enqueue. This diff includes a change to vlan(4) to take advantage of this. It overrides ac_enqueue with vlan_enqueue(). So a packet sent out a vlan interface will go into if_enqueue(), which will call ether_enqueue() because ether_ifattach set that up. ether_enqueue() will call ac->ac_enqueue which is vlan_enqueue on vlan interfaces, which will eventually call if_enqueue() on the parent interface. There are some things I'd like to try if this goes in. eg, it may be possible to have ether_enqueue simple call ac->ac_enqueue, and let bridge(4) swap it out with bridge_enqueue code. Doing that now would complicate the diff though. Anyway, how's this look, esp relative to the previous diff? Index: net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.569 diff -u -p -r1.569 if.c --- net/if.c 11 Dec 2018 22:08:57 -0000 1.569 +++ net/if.c 19 Dec 2018 06:48:37 -0000 @@ -632,6 +632,8 @@ if_attach_common(struct ifnet *ifp) if (ifp->if_rtrequest == NULL) ifp->if_rtrequest = if_rtrequest_dummy; + if (ifp->if_enqueue == NULL) + ifp->if_enqueue = if_enqueue_ifq; ifp->if_llprio = IFQ_DEFPRIO; SRPL_INIT(&ifp->if_inputs); @@ -683,34 +685,33 @@ if_qstart_compat(struct ifqueue *ifq) int if_enqueue(struct ifnet *ifp, struct mbuf *m) { - unsigned int idx; - struct ifqueue *ifq; - int error; - #if NPF > 0 if (m->m_pkthdr.pf.delay > 0) return (pf_delay_pkt(m, ifp->if_index)); -#endif - -#if NBRIDGE > 0 - if (ifp->if_bridgeport && (m->m_flags & M_PROTO1) == 0) { - KERNEL_LOCK(); - error = bridge_output(ifp, m, NULL, NULL); - KERNEL_UNLOCK(); - return (error); - } -#endif -#if NPF > 0 pf_pkt_addr_changed(m); #endif /* NPF > 0 */ - /* - * use the operations on the first ifq to pick which of the array - * gets this mbuf. - */ - idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m); - ifq = ifp->if_ifqs[idx]; + return ((*ifp->if_enqueue)(ifp, m)); +} + +int +if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m) +{ + struct ifqueue *ifq = &ifp->if_snd; + int error; + + if (ifp->if_nifqs > 1) { + unsigned int idx; + + /* + * use the operations on the first ifq to pick which of + * the array gets this mbuf. + */ + + idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m); + ifq = ifp->if_ifqs[idx]; + } error = ifq_enqueue(ifq, m); if (error) Index: net/if_ethersubr.c =================================================================== RCS file: /cvs/src/sys/net/if_ethersubr.c,v retrieving revision 1.255 diff -u -p -r1.255 if_ethersubr.c --- net/if_ethersubr.c 12 Dec 2018 05:38:26 -0000 1.255 +++ net/if_ethersubr.c 19 Dec 2018 06:48:37 -0000 @@ -121,6 +121,13 @@ didn't get a copy, you may request one f #include <netmpls/mpls.h> #endif /* MPLS */ +#include "bridge.h" +#if NBRIDGE > 0 +#include <net/if_bridge.h> +#endif + +static int ether_enqueue(struct ifnet *, struct mbuf *); + u_int8_t etherbroadcastaddr[ETHER_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; u_int8_t etheranyaddr[ETHER_ADDR_LEN] = @@ -335,6 +342,26 @@ ether_output(struct ifnet *ifp, struct m return (if_enqueue(ifp, m)); } +static int +ether_enqueue(struct ifnet *ifp, struct mbuf *m) +{ + struct arpcom *ac = (struct arpcom *)ifp; + +#if NBRIDGE > 0 + if (ifp->if_bridgeport && (m->m_flags & M_PROTO1) == 0) { + int error; + + KERNEL_LOCK(); + error = bridge_output(ifp, m, NULL, NULL); + KERNEL_UNLOCK(); + + return (error); + } +#endif + + return ((*ac->ac_enqueue)(ifp, m)); +} + /* * Process a received Ethernet packet; * the packet is in the mbuf chain m without @@ -518,6 +545,10 @@ ether_ifattach(struct ifnet *ifp) if (ifp->if_hardmtu == 0) ifp->if_hardmtu = ETHERMTU; + + ifp->if_enqueue = ether_enqueue; + if (ac->ac_enqueue == NULL) + ac->ac_enqueue = if_enqueue_ifq; if_alloc_sadl(ifp); memcpy(LLADDR(ifp->if_sadl), ac->ac_enaddr, ifp->if_addrlen); Index: net/if_var.h =================================================================== RCS file: /cvs/src/sys/net/if_var.h,v retrieving revision 1.92 diff -u -p -r1.92 if_var.h --- net/if_var.h 19 Dec 2018 05:31:28 -0000 1.92 +++ net/if_var.h 19 Dec 2018 06:48:37 -0000 @@ -162,6 +162,7 @@ struct ifnet { /* and the entries */ /* link level output function */ int (*if_ll_output)(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); + int (*if_enqueue)(struct ifnet *, struct mbuf *); void (*if_start)(struct ifnet *); /* initiate output */ int (*if_ioctl)(struct ifnet *, u_long, caddr_t); /* ioctl hook */ void (*if_watchdog)(struct ifnet *); /* timer routine */ @@ -331,6 +332,7 @@ extern struct ifnet_head ifnet; void if_start(struct ifnet *); int if_enqueue(struct ifnet *, struct mbuf *); +int if_enqueue_ifq(struct ifnet *, struct mbuf *); void if_input(struct ifnet *, struct mbuf_list *); void if_input_process(struct ifnet *, struct mbuf_list *); int if_input_local(struct ifnet *, struct mbuf *, sa_family_t); Index: net/if_vlan.c =================================================================== RCS file: /cvs/src/sys/net/if_vlan.c,v retrieving revision 1.179 diff -u -p -r1.179 if_vlan.c --- net/if_vlan.c 16 Nov 2018 08:43:08 -0000 1.179 +++ net/if_vlan.c 19 Dec 2018 06:48:37 -0000 @@ -58,6 +58,7 @@ #include <sys/sockio.h> #include <sys/systm.h> #include <sys/rwlock.h> +#include <sys/percpu.h> #include <net/if.h> #include <net/if_dl.h> @@ -85,6 +86,7 @@ int vlan_clone_create(struct if_clone *, int vlan_clone_destroy(struct ifnet *); int vlan_input(struct ifnet *, struct mbuf *, void *); +int vlan_enqueue(struct ifnet *, struct mbuf *); void vlan_start(struct ifqueue *ifq); int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); @@ -156,6 +158,7 @@ vlan_clone_create(struct if_clone *ifc, { struct ifvlan *ifv; struct ifnet *ifp; + struct arpcom *ac; ifv = malloc(sizeof(*ifv), M_DEVBUF, M_WAITOK|M_ZERO); LIST_INIT(&ifv->vlan_mc_listhead); @@ -181,6 +184,11 @@ vlan_clone_create(struct if_clone *ifc, ifp->if_ioctl = vlan_ioctl; ifp->if_hardmtu = 0xffff; ifp->if_link_state = LINK_STATE_DOWN; + + ac = (struct arpcom *)ifp; + ac->ac_enqueue = vlan_enqueue; + + if_counters_alloc(ifp); if_attach(ifp); ether_ifattach(ifp); ifp->if_hdrlen = EVL_ENCAPLEN; @@ -240,69 +248,96 @@ vlan_mplstunnel(int ifidx) } void +vlan_transmit(struct ifvlan *ifv, struct ifnet *ifp0, struct mbuf *m) +{ + struct ifnet *ifp = &ifv->ifv_if; + int txprio = ifv->ifv_prio; + uint8_t prio; + +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT); +#endif /* NBPFILTER > 0 */ + + prio = (txprio == IF_HDRPRIO_PACKET) ? + m->m_pkthdr.pf.prio : txprio; + + /* IEEE 802.1p has prio 0 and 1 swapped */ + if (prio <= 1) + prio = !prio; + + /* + * If this packet came from a pseudowire it means it already + * has all tags it needs, so just output it. + */ + if (vlan_mplstunnel(m->m_pkthdr.ph_ifidx)) { + /* NOTHING */ + + /* + * If the underlying interface cannot do VLAN tag insertion + * itself, create an encapsulation header. + */ + } else if ((ifp0->if_capabilities & IFCAP_VLAN_HWTAGGING) && + (ifv->ifv_type == ETHERTYPE_VLAN)) { + m->m_pkthdr.ether_vtag = ifv->ifv_tag + + (prio << EVL_PRIO_BITS); + m->m_flags |= M_VLANTAG; + } else { + m = vlan_inject(m, ifv->ifv_type, ifv->ifv_tag | + (prio << EVL_PRIO_BITS)); + if (m == NULL) { + counters_inc(ifp->if_counters, ifc_oerrors); + return; + } + } + + if (if_enqueue(ifp0, m)) + counters_inc(ifp->if_counters, ifc_oerrors); +} + +int +vlan_enqueue(struct ifnet *ifp, struct mbuf *m) +{ + struct ifnet *ifp0; + struct ifvlan *ifv; + + if (!ifq_is_priq(&ifp->if_snd)) + return (if_enqueue_ifq(ifp, m)); + + counters_pkt(ifp->if_counters, + ifc_opackets, ifc_obytes, m->m_pkthdr.len); + + ifv = ifp->if_softc; + ifp0 = if_get(ifv->ifv_ifp0); + if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) { + m_freem(m); + return (ENETDOWN); + } + + vlan_transmit(ifv, ifp0, m); + + if_put(ifp0); + + return (0); +} + +void vlan_start(struct ifqueue *ifq) { struct ifnet *ifp = ifq->ifq_if; - struct ifvlan *ifv; + struct ifvlan *ifv = ifp->if_softc; struct ifnet *ifp0; struct mbuf *m; - int txprio; - uint8_t prio; ifv = ifp->if_softc; ifp0 = if_get(ifv->ifv_ifp0); - if (ifp0 == NULL || (ifp0->if_flags & (IFF_UP|IFF_RUNNING)) != - (IFF_UP|IFF_RUNNING)) { + if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) { ifq_purge(ifq); goto leave; } - txprio = ifv->ifv_prio; - - while ((m = ifq_dequeue(ifq)) != NULL) { -#if NBPFILTER > 0 - if (ifp->if_bpf) - bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT); -#endif /* NBPFILTER > 0 */ - - prio = (txprio == IF_HDRPRIO_PACKET) ? - m->m_pkthdr.pf.prio : txprio; - - /* IEEE 802.1p has prio 0 and 1 swapped */ - if (prio <= 1) - prio = !prio; - - /* - * If this packet came from a pseudowire it means it already - * has all tags it needs, so just output it. - */ - if (vlan_mplstunnel(m->m_pkthdr.ph_ifidx)) { - /* NOTHING */ - - /* - * If the underlying interface cannot do VLAN tag insertion - * itself, create an encapsulation header. - */ - } else if ((ifp0->if_capabilities & IFCAP_VLAN_HWTAGGING) && - (ifv->ifv_type == ETHERTYPE_VLAN)) { - m->m_pkthdr.ether_vtag = ifv->ifv_tag + - (prio << EVL_PRIO_BITS); - m->m_flags |= M_VLANTAG; - } else { - m = vlan_inject(m, ifv->ifv_type, ifv->ifv_tag | - (prio << EVL_PRIO_BITS)); - if (m == NULL) { - ifp->if_oerrors++; - continue; - } - } - - if (if_enqueue(ifp0, m)) { - ifp->if_oerrors++; - ifq->ifq_errors++; - continue; - } - } + while ((m = ifq_dequeue(ifq)) != NULL) + vlan_transmit(ifv, ifp0, m); leave: if_put(ifp0); Index: netinet/if_ether.h =================================================================== RCS file: /cvs/src/sys/netinet/if_ether.h,v retrieving revision 1.74 diff -u -p -r1.74 if_ether.h --- netinet/if_ether.h 11 Dec 2018 01:27:08 -0000 1.74 +++ netinet/if_ether.h 19 Dec 2018 06:48:37 -0000 @@ -204,6 +204,7 @@ struct arpcom { LIST_HEAD(, ether_multi) ac_multiaddrs; /* list of multicast addrs */ int ac_multicnt; /* length of ac_multiaddrs */ int ac_multirangecnt; /* number of mcast ranges */ + int (*ac_enqueue)(struct ifnet *, struct mbuf *); };