On Mon, May 29, 2017 at 03:33:35PM +0000, Visa Hankala wrote:
> Currently, access to pflow's sc_outputqueue is not serialized properly.
> The producer has the NET_LOCK(), while the consumer does not.
> mpi@ suggested using mbuf_queue to solve the issue.

mpi@ commented that the pflow output task should be scheduled only
if mq_enqueue succeeds.

Another issue is that mbuf_queue has a size limit. If a lot of flows
get exported at once, the queue might become full, dropping pflow data.
To make this less likely, I tweaked the limit from 256 (IFQ_MAXLEN) to
8192. Of course, packet loss can happen in many other places as well.

Updated patch below. OK?

Index: net/if_pflow.c
===================================================================
RCS file: src/sys/net/if_pflow.c,v
retrieving revision 1.78
diff -u -p -r1.78 if_pflow.c
--- net/if_pflow.c      27 May 2017 21:44:22 -0000      1.78
+++ net/if_pflow.c      30 May 2017 12:40:26 -0000
@@ -132,7 +132,7 @@ pflow_output_process(void *arg)
        struct mbuf *m;
 
        KERNEL_LOCK();
-       while ((m = ml_dequeue(&sc->sc_outputqueue)) != NULL) {
+       while ((m = mq_dequeue(&sc->sc_outputqueue)) != NULL) {
                pflow_sendout_mbuf(sc, m);
        }
        KERNEL_UNLOCK();
@@ -256,7 +256,7 @@ pflow_clone_create(struct if_clone *ifc,
        ifp->if_hdrlen = PFLOW_HDRLEN;
        ifp->if_flags = IFF_UP;
        ifp->if_flags &= ~IFF_RUNNING;  /* not running, need receiver */
-       ml_init(&pflowif->sc_outputqueue);
+       mq_init(&pflowif->sc_outputqueue, 8192, IPL_SOFTNET);
        pflow_setmtu(pflowif, ETHERMTU);
        pflow_init_timeouts(pflowif);
        if_attach(ifp);
@@ -288,7 +288,7 @@ pflow_clone_destroy(struct ifnet *ifp)
                timeout_del(&sc->sc_tmo_tmpl);
        pflow_flush(sc);
        task_del(softnettq, &sc->sc_outputtask);
-       ml_purge(&sc->sc_outputqueue);
+       mq_purge(&sc->sc_outputqueue);
        m_freem(sc->send_nam);
        if (sc->so != NULL) {
                error = soclose(sc->so);
@@ -1089,8 +1089,8 @@ pflow_sendout_v5(struct pflow_softc *sc)
        getnanotime(&tv);
        h->time_sec = htonl(tv.tv_sec);                 /* XXX 2038 */
        h->time_nanosec = htonl(tv.tv_nsec);
-       ml_enqueue(&sc->sc_outputqueue, m);
-       task_add(softnettq, &sc->sc_outputtask);
+       if (mq_enqueue(&sc->sc_outputqueue, m) == 0)
+               task_add(softnettq, &sc->sc_outputtask);
        return (0);
 }
 
@@ -1151,8 +1151,8 @@ pflow_sendout_ipfix(struct pflow_softc *
        h10->flow_sequence = htonl(sc->sc_sequence);
        sc->sc_sequence += count;
        h10->observation_dom = htonl(PFLOW_ENGINE_TYPE);
-       ml_enqueue(&sc->sc_outputqueue, m);
-       task_add(softnettq, &sc->sc_outputtask);
+       if (mq_enqueue(&sc->sc_outputqueue, m) == 0)
+               task_add(softnettq, &sc->sc_outputtask);
        return (0);
 }
 
@@ -1193,8 +1193,8 @@ pflow_sendout_ipfix_tmpl(struct pflow_so
        h10->observation_dom = htonl(PFLOW_ENGINE_TYPE);
 
        timeout_add_sec(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT);
-       ml_enqueue(&sc->sc_outputqueue, m);
-       task_add(softnettq, &sc->sc_outputtask);
+       if (mq_enqueue(&sc->sc_outputqueue, m) == 0)
+               task_add(softnettq, &sc->sc_outputtask);
        return (0);
 }
 
Index: net/if_pflow.h
===================================================================
RCS file: src/sys/net/if_pflow.h,v
retrieving revision 1.16
diff -u -p -r1.16 if_pflow.h
--- net/if_pflow.h      27 May 2017 21:06:06 -0000      1.16
+++ net/if_pflow.h      30 May 2017 12:40:26 -0000
@@ -184,7 +184,7 @@ struct pflow_softc {
        struct timeout           sc_tmo;
        struct timeout           sc_tmo6;
        struct timeout           sc_tmo_tmpl;
-       struct mbuf_list         sc_outputqueue;
+       struct mbuf_queue        sc_outputqueue;
        struct task              sc_outputtask;
        struct socket           *so;
        struct mbuf             *send_nam;

Reply via email to