On Mon, May 29, 2017 at 03:33:35PM +0000, Visa Hankala wrote: > Currently, access to pflow's sc_outputqueue is not serialized properly. > The producer has the NET_LOCK(), while the consumer does not. > mpi@ suggested using mbuf_queue to solve the issue.
mpi@ commented that the pflow output task should be scheduled only if mq_enqueue succeeds. Another issue is that mbuf_queue has a size limit. If a lot of flows get exported at once, the queue might become full, dropping pflow data. To make this less likely, I tweaked the limit from 256 (IFQ_MAXLEN) to 8192. Of course, packet loss can happen in many other places as well. Updated patch below. OK? Index: net/if_pflow.c =================================================================== RCS file: src/sys/net/if_pflow.c,v retrieving revision 1.78 diff -u -p -r1.78 if_pflow.c --- net/if_pflow.c 27 May 2017 21:44:22 -0000 1.78 +++ net/if_pflow.c 30 May 2017 12:40:26 -0000 @@ -132,7 +132,7 @@ pflow_output_process(void *arg) struct mbuf *m; KERNEL_LOCK(); - while ((m = ml_dequeue(&sc->sc_outputqueue)) != NULL) { + while ((m = mq_dequeue(&sc->sc_outputqueue)) != NULL) { pflow_sendout_mbuf(sc, m); } KERNEL_UNLOCK(); @@ -256,7 +256,7 @@ pflow_clone_create(struct if_clone *ifc, ifp->if_hdrlen = PFLOW_HDRLEN; ifp->if_flags = IFF_UP; ifp->if_flags &= ~IFF_RUNNING; /* not running, need receiver */ - ml_init(&pflowif->sc_outputqueue); + mq_init(&pflowif->sc_outputqueue, 8192, IPL_SOFTNET); pflow_setmtu(pflowif, ETHERMTU); pflow_init_timeouts(pflowif); if_attach(ifp); @@ -288,7 +288,7 @@ pflow_clone_destroy(struct ifnet *ifp) timeout_del(&sc->sc_tmo_tmpl); pflow_flush(sc); task_del(softnettq, &sc->sc_outputtask); - ml_purge(&sc->sc_outputqueue); + mq_purge(&sc->sc_outputqueue); m_freem(sc->send_nam); if (sc->so != NULL) { error = soclose(sc->so); @@ -1089,8 +1089,8 @@ pflow_sendout_v5(struct pflow_softc *sc) getnanotime(&tv); h->time_sec = htonl(tv.tv_sec); /* XXX 2038 */ h->time_nanosec = htonl(tv.tv_nsec); - ml_enqueue(&sc->sc_outputqueue, m); - task_add(softnettq, &sc->sc_outputtask); + if (mq_enqueue(&sc->sc_outputqueue, m) == 0) + task_add(softnettq, &sc->sc_outputtask); return (0); } @@ -1151,8 +1151,8 @@ pflow_sendout_ipfix(struct pflow_softc * h10->flow_sequence = htonl(sc->sc_sequence); sc->sc_sequence += count; h10->observation_dom = htonl(PFLOW_ENGINE_TYPE); - ml_enqueue(&sc->sc_outputqueue, m); - task_add(softnettq, &sc->sc_outputtask); + if (mq_enqueue(&sc->sc_outputqueue, m) == 0) + task_add(softnettq, &sc->sc_outputtask); return (0); } @@ -1193,8 +1193,8 @@ pflow_sendout_ipfix_tmpl(struct pflow_so h10->observation_dom = htonl(PFLOW_ENGINE_TYPE); timeout_add_sec(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT); - ml_enqueue(&sc->sc_outputqueue, m); - task_add(softnettq, &sc->sc_outputtask); + if (mq_enqueue(&sc->sc_outputqueue, m) == 0) + task_add(softnettq, &sc->sc_outputtask); return (0); } Index: net/if_pflow.h =================================================================== RCS file: src/sys/net/if_pflow.h,v retrieving revision 1.16 diff -u -p -r1.16 if_pflow.h --- net/if_pflow.h 27 May 2017 21:06:06 -0000 1.16 +++ net/if_pflow.h 30 May 2017 12:40:26 -0000 @@ -184,7 +184,7 @@ struct pflow_softc { struct timeout sc_tmo; struct timeout sc_tmo6; struct timeout sc_tmo_tmpl; - struct mbuf_list sc_outputqueue; + struct mbuf_queue sc_outputqueue; struct task sc_outputtask; struct socket *so; struct mbuf *send_nam;