On Fri, May 13, 2022 at 12:19:46PM +1000, David Gwynne wrote: > On Thu, May 12, 2022 at 08:07:09PM +0200, Hrvoje Popovski wrote: > > On 12.5.2022. 20:04, Hrvoje Popovski wrote: > > > On 12.5.2022. 16:22, Hrvoje Popovski wrote: > > >> On 12.5.2022. 14:48, Claudio Jeker wrote: > > >>> I think the diff below may be enough to fix this issue. It drops the SMR > > >>> critical secition around the enqueue operation but uses a reference on > > >>> the > > >>> port insteadt to ensure that the device can't be removed during the > > >>> enqueue. Once the enqueue is finished we enter the SMR critical section > > >>> again and drop the reference. > > >>> > > >>> To make it clear that the SMR_TAILQ remains intact while a refcount is > > >>> held I moved refcnt_finalize() above SMR_TAILQ_REMOVE_LOCKED(). This is > > >>> not strictly needed since the next pointer remains valid up until the > > >>> smr_barrier() call but I find this a bit easier to understand. > > >>> First make sure nobody else holds a reference then remove the port from > > >>> the list. > > >>> > > >>> I currently have no test setup to verify this but I hope someone else > > >>> can > > >>> give this a spin. > > >> Hi, > > >> > > >> for now veb seems stable and i can't panic box although it should, but > > >> please give me few more hours to torture it properly. > > > > > > > > > I can trigger panic in veb with this diff. > > > > > > Thank you .. > > > > > > > > > > I can't trigger ... :))) sorry .. > > sorry i'm late to the party. can you try this diff? > > this diff replaces the list of ports with an array/map of ports. > the map takes references to all the ports, so the forwarding paths > just have to hold a reference to the map to be able to use all the > ports. the forwarding path uses smr to get hold of a map, takes a map > ref, and then leaves the smr crit section before iterating over the map > and pushing packets. > > this means we should only take and release a single refcnt when > we're pushing packets out any number of ports. > > if no span ports are configured, then there's no span port map and > we don't try and take a ref, we can just return early. > > we also only take and release a single refcnt when we forward the > actual packet. forwarding to a single port provided by an etherbridge > lookup already takes/releases the single port ref. if it falls > through that for unknown unicast or broadcast/multicast, then it's > a single refcnt for the current map of all ports.
This is very smart and probably better since it uses less atomic instructions per packet. Diff reads fine. OK claudio@ > Index: if_veb.c > =================================================================== > RCS file: /cvs/src/sys/net/if_veb.c,v > retrieving revision 1.25 > diff -u -p -r1.25 if_veb.c > --- if_veb.c 4 Jan 2022 06:32:39 -0000 1.25 > +++ if_veb.c 13 May 2022 02:01:43 -0000 > @@ -139,13 +139,13 @@ struct veb_port { > struct veb_rule_list p_vr_list[2]; > #define VEB_RULE_LIST_OUT 0 > #define VEB_RULE_LIST_IN 1 > - > - SMR_TAILQ_ENTRY(veb_port) p_entry; > }; > > struct veb_ports { > - SMR_TAILQ_HEAD(, veb_port) l_list; > - unsigned int l_count; > + struct refcnt m_refs; > + unsigned int m_count; > + > + /* followed by an array of veb_port pointers */ > }; > > struct veb_softc { > @@ -155,8 +155,8 @@ struct veb_softc { > struct etherbridge sc_eb; > > struct rwlock sc_rule_lock; > - struct veb_ports sc_ports; > - struct veb_ports sc_spans; > + struct veb_ports *sc_ports; > + struct veb_ports *sc_spans; > }; > > #define DPRINTF(_sc, fmt...) do { \ > @@ -184,8 +184,25 @@ static int veb_p_ioctl(struct ifnet *, u > static int veb_p_output(struct ifnet *, struct mbuf *, > struct sockaddr *, struct rtentry *); > > -static void veb_p_dtor(struct veb_softc *, struct veb_port *, > - const char *); > +static inline size_t > +veb_ports_size(unsigned int n) > +{ > + /* use of _ALIGN is inspired by CMSGs */ > + return _ALIGN(sizeof(struct veb_ports)) + > + n * sizeof(struct veb_port *); > +} > + > +static inline struct veb_port ** > +veb_ports_array(struct veb_ports *m) > +{ > + return (struct veb_port **)((caddr_t)m + _ALIGN(sizeof(*m))); > +} > + > +static void veb_ports_free(struct veb_ports *); > + > +static void veb_p_unlink(struct veb_softc *, struct veb_port *); > +static void veb_p_fini(struct veb_port *); > +static void veb_p_dtor(struct veb_softc *, struct veb_port *); > static int veb_add_port(struct veb_softc *, > const struct ifbreq *, unsigned int); > static int veb_del_port(struct veb_softc *, > @@ -271,8 +288,8 @@ veb_clone_create(struct if_clone *ifc, i > return (ENOMEM); > > rw_init(&sc->sc_rule_lock, "vebrlk"); > - SMR_TAILQ_INIT(&sc->sc_ports.l_list); > - SMR_TAILQ_INIT(&sc->sc_spans.l_list); > + sc->sc_ports = NULL; > + sc->sc_spans = NULL; > > ifp = &sc->sc_if; > > @@ -314,7 +331,10 @@ static int > veb_clone_destroy(struct ifnet *ifp) > { > struct veb_softc *sc = ifp->if_softc; > - struct veb_port *p, *np; > + struct veb_ports *mp, *ms; > + struct veb_port **ps; > + struct veb_port *p; > + unsigned int i; > > NET_LOCK(); > sc->sc_dead = 1; > @@ -326,10 +346,60 @@ veb_clone_destroy(struct ifnet *ifp) > if_detach(ifp); > > NET_LOCK(); > - SMR_TAILQ_FOREACH_SAFE_LOCKED(p, &sc->sc_ports.l_list, p_entry, np) > - veb_p_dtor(sc, p, "destroy"); > - SMR_TAILQ_FOREACH_SAFE_LOCKED(p, &sc->sc_spans.l_list, p_entry, np) > - veb_p_dtor(sc, p, "destroy"); > + > + /* > + * this is an upside down version of veb_p_dtor() and > + * veb_ports_destroy() to avoid a lot of malloc/free and > + * smr_barrier calls if we remove ports one by one. > + */ > + > + mp = SMR_PTR_GET_LOCKED(&sc->sc_ports); > + SMR_PTR_SET_LOCKED(&sc->sc_ports, NULL); > + if (mp != NULL) { > + ps = veb_ports_array(mp); > + for (i = 0; i < mp->m_count; i++) { > + veb_p_unlink(sc, ps[i]); > + } > + } > + > + ms = SMR_PTR_GET_LOCKED(&sc->sc_spans); > + SMR_PTR_SET_LOCKED(&sc->sc_spans, NULL); > + if (ms != NULL) { > + ps = veb_ports_array(ms); > + for (i = 0; i < ms->m_count; i++) > + veb_p_unlink(sc, ps[i]); > + } > + > + if (mp != NULL || ms != NULL) { > + smr_barrier(); /* everything everywhere all at once */ > + > + if (mp != NULL) { > + refcnt_finalize(&mp->m_refs, "vebdtor"); > + > + ps = veb_ports_array(mp); > + for (i = 0; i < mp->m_count; i++) { > + p = ps[i]; > + /* the ports map holds a port ref */ > + refcnt_rele(&p->p_refs); > + /* now we can finalize the port */ > + veb_p_fini(p); > + } > + > + veb_ports_free(mp); > + } > + if (ms != NULL) { > + refcnt_finalize(&ms->m_refs, "vebdtor"); > + > + ps = veb_ports_array(ms); > + for (i = 0; i < ms->m_count; i++) { > + p = ps[i]; > + refcnt_rele(&p->p_refs); > + veb_p_fini(p); > + } > + > + veb_ports_free(ms); > + } > + } > NET_UNLOCK(); > > etherbridge_destroy(&sc->sc_eb); > @@ -349,12 +419,25 @@ veb_span_input(struct ifnet *ifp0, struc > static void > veb_span(struct veb_softc *sc, struct mbuf *m0) > { > + struct veb_ports *sm; > + struct veb_port **ps; > struct veb_port *p; > struct ifnet *ifp0; > struct mbuf *m; > + unsigned int i; > > smr_read_enter(); > - SMR_TAILQ_FOREACH(p, &sc->sc_spans.l_list, p_entry) { > + sm = SMR_PTR_GET(&sc->sc_spans); > + if (sm != NULL) > + refcnt_take(&sm->m_refs); > + smr_read_leave(); > + if (sm == NULL) > + return; > + > + ps = veb_ports_array(sm); > + for (i = 0; i < sm->m_count; i++) { > + p = ps[i]; > + > ifp0 = p->p_ifp0; > if (!ISSET(ifp0->if_flags, IFF_RUNNING)) > continue; > @@ -367,7 +450,7 @@ veb_span(struct veb_softc *sc, struct mb > > if_enqueue(ifp0, m); /* XXX count error */ > } > - smr_read_leave(); > + refcnt_rele_wake(&sm->m_refs); > } > > static int > @@ -847,9 +930,12 @@ veb_broadcast(struct veb_softc *sc, stru > uint64_t src, uint64_t dst) > { > struct ifnet *ifp = &sc->sc_if; > + struct veb_ports *pm; > + struct veb_port **ps; > struct veb_port *tp; > struct ifnet *ifp0; > struct mbuf *m; > + unsigned int i; > > #if NPF > 0 > /* > @@ -873,7 +959,17 @@ veb_broadcast(struct veb_softc *sc, stru > m0->m_pkthdr.len); > > smr_read_enter(); > - SMR_TAILQ_FOREACH(tp, &sc->sc_ports.l_list, p_entry) { > + pm = SMR_PTR_GET(&sc->sc_ports); > + if (__predict_true(pm != NULL)) > + refcnt_take(&pm->m_refs); > + smr_read_leave(); > + if (__predict_false(pm == NULL)) > + goto done; > + > + ps = veb_ports_array(pm); > + for (i = 0; i < pm->m_count; i++) { > + tp = ps[i]; > + > if (rp == tp || (rp->p_protected & tp->p_protected)) { > /* > * don't let Ethernet packets hairpin or > @@ -906,8 +1002,9 @@ veb_broadcast(struct veb_softc *sc, stru > > (*tp->p_enqueue)(ifp0, m); /* XXX count error */ > } > - smr_read_leave(); > + refcnt_rele_wake(&pm->m_refs); > > +done: > m_freem(m0); > } > > @@ -1241,12 +1338,99 @@ veb_ioctl(struct ifnet *ifp, u_long cmd, > return (error); > } > > +static struct veb_ports * > +veb_ports_insert(struct veb_ports *om, struct veb_port *p) > +{ > + struct veb_ports *nm; > + struct veb_port **nps, **ops; > + unsigned int ocount = om != NULL ? om->m_count : 0; > + unsigned int ncount = ocount + 1; > + unsigned int i; > + > + nm = malloc(veb_ports_size(ncount), M_DEVBUF, M_WAITOK|M_ZERO); > + > + refcnt_init(&nm->m_refs); > + nm->m_count = ncount; > + > + nps = veb_ports_array(nm); > + > + if (om != NULL) { > + ops = veb_ports_array(om); > + for (i = 0; i < ocount; i++) { > + struct veb_port *op = ops[i]; > + refcnt_take(&op->p_refs); > + nps[i] = op; > + } > + } else > + i = 0; > + > + refcnt_take(&p->p_refs); > + nps[i] = p; > + > + return (nm); > +} > + > +static struct veb_ports * > +veb_ports_remove(struct veb_ports *om, struct veb_port *p) > +{ > + struct veb_ports *nm; > + struct veb_port **nps, **ops; > + unsigned int ocount = om->m_count; > + unsigned int ncount = ocount - 1; > + unsigned int i, j; > + > + if (ncount == 0) > + return (NULL); > + > + nm = malloc(veb_ports_size(ncount), M_DEVBUF, M_WAITOK|M_ZERO); > + > + refcnt_init(&nm->m_refs); > + nm->m_count = ncount; > + > + nps = veb_ports_array(nm); > + j = 0; > + > + ops = veb_ports_array(om); > + for (i = 0; i < ocount; i++) { > + struct veb_port *op = ops[i]; > + if (op == p) > + continue; > + > + refcnt_take(&op->p_refs); > + nps[j++] = op; > + } > + KASSERT(j == ncount); > + > + return (nm); > +} > + > +static inline void > +veb_ports_free(struct veb_ports *m) > +{ > + free(m, M_DEVBUF, veb_ports_size(m->m_count)); > +} > + > +static void > +veb_ports_destroy(struct veb_ports *m) > +{ > + struct veb_port **ps = veb_ports_array(m); > + unsigned int i; > + > + for (i = 0; i < m->m_count; i++) { > + struct veb_port *p = ps[i]; > + refcnt_rele_wake(&p->p_refs); > + } > + > + veb_ports_free(m); > +} > + > static int > veb_add_port(struct veb_softc *sc, const struct ifbreq *req, unsigned int > span) > { > struct ifnet *ifp = &sc->sc_if; > struct ifnet *ifp0; > - struct veb_ports *port_list; > + struct veb_ports **ports_ptr; > + struct veb_ports *om, *nm; > struct veb_port *p; > int isvport; > int error; > @@ -1294,7 +1478,7 @@ veb_add_port(struct veb_softc *sc, const > p->p_output = ifp0->if_output; > > if (span) { > - port_list = &sc->sc_spans; > + ports_ptr = &sc->sc_spans; > > if (isvport) { > error = EPROTONOSUPPORT; > @@ -1304,7 +1488,7 @@ veb_add_port(struct veb_softc *sc, const > p->p_brport.eb_input = veb_span_input; > p->p_bif_flags = IFBIF_SPAN; > } else { > - port_list = &sc->sc_ports; > + ports_ptr = &sc->sc_ports; > > error = ifpromisc(ifp0, 1); > if (error != 0) > @@ -1318,6 +1502,9 @@ veb_add_port(struct veb_softc *sc, const > p->p_brport.eb_port_take = veb_eb_brport_take; > p->p_brport.eb_port_rele = veb_eb_brport_rele; > > + om = SMR_PTR_GET_LOCKED(ports_ptr); > + nm = veb_ports_insert(om, p); > + > /* this might have changed if we slept for malloc or ifpromisc */ > error = ether_brport_isset(ifp0); > if (error != 0) > @@ -1332,8 +1519,7 @@ veb_add_port(struct veb_softc *sc, const > p->p_brport.eb_port = p; > > /* commit */ > - SMR_TAILQ_INSERT_TAIL_LOCKED(&port_list->l_list, p, p_entry); > - port_list->l_count++; > + SMR_PTR_SET_LOCKED(ports_ptr, nm); > > ether_brport_set(ifp0, &p->p_brport); > if (!isvport) { /* vport is special */ > @@ -1343,6 +1529,13 @@ veb_add_port(struct veb_softc *sc, const > > veb_p_linkch(p); > > + /* clean up the old veb_ports map */ > + smr_barrier(); > + if (om != NULL) { > + refcnt_finalize(&om->m_refs, "vebports"); > + veb_ports_destroy(om); > + } > + > return (0); > > unpromisc: > @@ -1358,12 +1551,19 @@ put: > static struct veb_port * > veb_trunkport(struct veb_softc *sc, const char *name, unsigned int span) > { > - struct veb_ports *port_list; > + struct veb_ports *m; > + struct veb_port **ps; > struct veb_port *p; > + unsigned int i; > + > + m = SMR_PTR_GET_LOCKED(span ? &sc->sc_spans : &sc->sc_ports); > + if (m == NULL) > + return (NULL); > > - port_list = span ? &sc->sc_spans : &sc->sc_ports; > + ps = veb_ports_array(m); > + for (i = 0; i < m->m_count; i++) { > + p = ps[i]; > > - SMR_TAILQ_FOREACH_LOCKED(p, &port_list->l_list, p_entry) { > if (strcmp(p->p_ifp0->if_xname, name) == 0) > return (p); > } > @@ -1381,7 +1581,7 @@ veb_del_port(struct veb_softc *sc, const > if (p == NULL) > return (EINVAL); > > - veb_p_dtor(sc, p, "del"); > + veb_p_dtor(sc, p); > > return (0); > } > @@ -1389,20 +1589,31 @@ veb_del_port(struct veb_softc *sc, const > static struct veb_port * > veb_port_get(struct veb_softc *sc, const char *name) > { > + struct veb_ports *m; > + struct veb_port **ps; > struct veb_port *p; > + struct ifnet *ifp0; > + unsigned int i; > > NET_ASSERT_LOCKED(); > > - SMR_TAILQ_FOREACH_LOCKED(p, &sc->sc_ports.l_list, p_entry) { > - struct ifnet *ifp0 = p->p_ifp0; > + m = SMR_PTR_GET_LOCKED(&sc->sc_ports); > + if (m == NULL) > + return (NULL); > + > + ps = veb_ports_array(m); > + for (i = 0; i < m->m_count; i++) { > + p = ps[i]; > + ifp0 = p->p_ifp0; > + > if (strncmp(ifp0->if_xname, name, > sizeof(ifp0->if_xname)) == 0) { > refcnt_take(&p->p_refs); > - break; > + return (p); > } > } > > - return (p); > + return (NULL); > } > > static void > @@ -1712,56 +1923,77 @@ static int > veb_port_list(struct veb_softc *sc, struct ifbifconf *bifc) > { > struct ifnet *ifp = &sc->sc_if; > + struct veb_ports *m; > + struct veb_port **ps; > struct veb_port *p; > struct ifnet *ifp0; > struct ifbreq breq; > int n = 0, error = 0; > + unsigned int i; > > NET_ASSERT_LOCKED(); > > if (bifc->ifbic_len == 0) { > - n = sc->sc_ports.l_count + sc->sc_spans.l_count; > + m = SMR_PTR_GET_LOCKED(&sc->sc_ports); > + if (m != NULL) > + n += m->m_count; > + m = SMR_PTR_GET_LOCKED(&sc->sc_spans); > + if (m != NULL) > + n += m->m_count; > goto done; > } > > - SMR_TAILQ_FOREACH_LOCKED(p, &sc->sc_ports.l_list, p_entry) { > - if (bifc->ifbic_len < sizeof(breq)) > - break; > + m = SMR_PTR_GET_LOCKED(&sc->sc_ports); > + if (m != NULL) { > + ps = veb_ports_array(m); > + for (i = 0; i < m->m_count; i++) { > + if (bifc->ifbic_len < sizeof(breq)) > + break; > > - memset(&breq, 0, sizeof(breq)); > + p = ps[i]; > > - ifp0 = p->p_ifp0; > + memset(&breq, 0, sizeof(breq)); > + > + ifp0 = p->p_ifp0; > > - strlcpy(breq.ifbr_name, ifp->if_xname, IFNAMSIZ); > - strlcpy(breq.ifbr_ifsname, ifp0->if_xname, IFNAMSIZ); > + strlcpy(breq.ifbr_name, ifp->if_xname, IFNAMSIZ); > + strlcpy(breq.ifbr_ifsname, ifp0->if_xname, IFNAMSIZ); > > - breq.ifbr_ifsflags = p->p_bif_flags; > - breq.ifbr_portno = ifp0->if_index; > - breq.ifbr_protected = p->p_protected; > - if ((error = copyout(&breq, bifc->ifbic_req + n, > - sizeof(breq))) != 0) > - goto done; > + breq.ifbr_ifsflags = p->p_bif_flags; > + breq.ifbr_portno = ifp0->if_index; > + breq.ifbr_protected = p->p_protected; > + if ((error = copyout(&breq, bifc->ifbic_req + n, > + sizeof(breq))) != 0) > + goto done; > > - bifc->ifbic_len -= sizeof(breq); > - n++; > + bifc->ifbic_len -= sizeof(breq); > + n++; > + } > } > > - SMR_TAILQ_FOREACH_LOCKED(p, &sc->sc_spans.l_list, p_entry) { > - if (bifc->ifbic_len < sizeof(breq)) > - break; > + m = SMR_PTR_GET_LOCKED(&sc->sc_spans); > + if (m != NULL) { > + ps = veb_ports_array(m); > + for (i = 0; i < m->m_count; i++) { > + if (bifc->ifbic_len < sizeof(breq)) > + break; > > - memset(&breq, 0, sizeof(breq)); > + p = ps[i]; > > - strlcpy(breq.ifbr_name, ifp->if_xname, IFNAMSIZ); > - strlcpy(breq.ifbr_ifsname, p->p_ifp0->if_xname, IFNAMSIZ); > + memset(&breq, 0, sizeof(breq)); > > - breq.ifbr_ifsflags = p->p_bif_flags; > - if ((error = copyout(&breq, bifc->ifbic_req + n, > - sizeof(breq))) != 0) > - goto done; > + strlcpy(breq.ifbr_name, ifp->if_xname, IFNAMSIZ); > + strlcpy(breq.ifbr_ifsname, p->p_ifp0->if_xname, > + IFNAMSIZ); > > - bifc->ifbic_len -= sizeof(breq); > - n++; > + breq.ifbr_ifsflags = p->p_bif_flags; > + if ((error = copyout(&breq, bifc->ifbic_req + n, > + sizeof(breq))) != 0) > + goto done; > + > + bifc->ifbic_len -= sizeof(breq); > + n++; > + } > } > > done: > @@ -1904,46 +2136,67 @@ veb_p_output(struct ifnet *ifp0, struct > return ((*p_output)(ifp0, m, dst, rt)); > } > > +/* > + * there must be an smr_barrier after ether_brport_clr() and before > + * veb_port is freed in veb_p_fini() > + */ > + > static void > -veb_p_dtor(struct veb_softc *sc, struct veb_port *p, const char *op) > +veb_p_unlink(struct veb_softc *sc, struct veb_port *p) > { > struct ifnet *ifp = &sc->sc_if; > struct ifnet *ifp0 = p->p_ifp0; > - struct veb_ports *port_list; > - > - DPRINTF(sc, "%s %s: destroying port\n", > - ifp->if_xname, ifp0->if_xname); > > ifp0->if_ioctl = p->p_ioctl; > ifp0->if_output = p->p_output; > > - ether_brport_clr(ifp0); > + ether_brport_clr(ifp0); /* needs an smr_barrier */ > > if_detachhook_del(ifp0, &p->p_dtask); > if_linkstatehook_del(ifp0, &p->p_ltask); > > - if (ISSET(p->p_bif_flags, IFBIF_SPAN)) { > - port_list = &sc->sc_spans; > - } else { > + if (!ISSET(p->p_bif_flags, IFBIF_SPAN)) { > if (ifpromisc(ifp0, 0) != 0) { > log(LOG_WARNING, "%s %s: unable to disable promisc\n", > ifp->if_xname, ifp0->if_xname); > } > > etherbridge_detach_port(&sc->sc_eb, p); > - > - port_list = &sc->sc_ports; > } > - SMR_TAILQ_REMOVE_LOCKED(&port_list->l_list, p, p_entry); > - port_list->l_count--; > +} > > - refcnt_finalize(&p->p_refs, "vebpdtor"); > - smr_barrier(); > +static void > +veb_p_fini(struct veb_port *p) > +{ > + struct ifnet *ifp0 = p->p_ifp0; > > + refcnt_finalize(&p->p_refs, "vebpdtor"); > veb_rule_list_free(TAILQ_FIRST(&p->p_vrl)); > > if_put(ifp0); > - free(p, M_DEVBUF, sizeof(*p)); > + free(p, M_DEVBUF, sizeof(*p)); /* hope you didn't forget smr_barrier */ > +} > + > +static void > +veb_p_dtor(struct veb_softc *sc, struct veb_port *p) > +{ > + struct veb_ports **ports_ptr; > + struct veb_ports *om, *nm; > + > + ports_ptr = ISSET(p->p_bif_flags, IFBIF_SPAN) ? > + &sc->sc_spans : &sc->sc_ports; > + > + om = SMR_PTR_GET_LOCKED(ports_ptr); > + nm = veb_ports_remove(om, p); > + SMR_PTR_SET_LOCKED(ports_ptr, nm); > + > + veb_p_unlink(sc, p); > + > + smr_barrier(); > + refcnt_finalize(&om->m_refs, "vebports"); > + veb_ports_destroy(om); > + > + veb_p_fini(p); > } > > static void > @@ -1952,7 +2205,7 @@ veb_p_detach(void *arg) > struct veb_port *p = arg; > struct veb_softc *sc = p->p_veb; > > - veb_p_dtor(sc, p, "detach"); > + veb_p_dtor(sc, p); > > NET_ASSERT_LOCKED(); > } -- :wq Claudio