On Wed, Mar 01, 2017 at 10:03:42PM +0100, Mike Belopuhov wrote:
> Priority queueing is the default policy in OpenBSD and it
> distributes outgoing packets in 8 lists by priority (0-7) with
> an aggregate queue depth set by the interface: pseudo interfaces
> use IFQ_MAXLEN defined equal to 256, hardware device drivers
> normally size it by their TX ring minus 1 (therefore 127, 255,
> 511 are common values).
> 
> Unless a producer generating packets with altered priority is
> used (such as "set prio" pf directive, PPPoE management frames,
> ping -T lowdelay, VLAN priority, and so on) all outgoing traffic
> is sent with a priority of 3 hitting the same list.
> 
> The drop policy used here is called tail drop because it drops
> the packet that we're trying to enqueue when there's no more
> space left on the queue.  The obvious downside is that if our
> queue is full of packets representing low priority traffic,
> trying to enqueue a packet with a higher priority will still
> result in a drop.  In my opinion, this defeats the purpose of
> priority queueing.
> 
> The diff below changes the policy to a head drop from the queue
> with the lowest priority than the packet we're trying to
> enqueue.  If there's no such queue (e.g. the default case where
> all traffic has priority of 3) only then the packet is dropped.
> This ensures that high priority traffic will almost always find
> the place on the queue and low priority bulk traffic gets a
> better chance at regulating its throughput.  By performing a
> head drop instead a tail drop we also drop the oldest packet on
> the queue.  This technique is akin to Active Queue Management
> algorithms.

i agree we should do this.

> 
> I'd like to stress again, that this doesn't change much for the
> default Ethernet-to-Ethernet case, but provides noticeable
> difference if different priorities are actually used, e.g. via
> pf.
> 
> More tests are always welcome.  This should go on top of the priq
> mbuf list diff, but here's a combined diff for convenience:
> http://gir.theapt.org/~mike/priq.diff
> 
> ---
>  sys/net/ifq.c | 23 ++++++++++++++++++++---
>  1 file changed, 20 insertions(+), 3 deletions(-)
> 
> diff --git sys/net/ifq.c sys/net/ifq.c
> index 896b373c454..f678c2b01fd 100644
> --- sys/net/ifq.c
> +++ sys/net/ifq.c
> @@ -407,18 +407,35 @@ priq_free(unsigned int idx, void *pq)
>  int
>  priq_enq(struct ifqueue *ifq, struct mbuf *m)
>  {
>       struct priq *pq;
>       struct mbuf_list *pl;
> -
> -     if (ifq_len(ifq) >= ifq->ifq_maxlen)
> -             return (ENOBUFS);
> +     unsigned int prio;
>  
>       pq = ifq->ifq_q;
>       KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
>       pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
>  
> +     /* Find a lower priority queue to drop from */
> +     if (ifq_len(ifq) >= ifq->ifq_maxlen) {
> +             for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
> +                     pl = &pq->pq_lists[prio];
> +                     if (ml_len(pl) > 0) {
> +                             m_freem(ml_dequeue(pl));

the current code has been very careful not to free an mbuf while
holding the ifq mutex. i would prefer to keep it that way.

the least worst way to do that would be to return the mbuf to be
dropped for ifq_enqueue to free. this is complicated because of the
semantics that ifq_enqueue_try provides, but nothing uses that so
we can get rid of it to support this.

the diff below makes the ifq enq op return an mbuf to be freed, and
gets rid of ifq_enqueue_try. that in turn should let you return
this mbuf here rather than free it directly.

> +                             ifq->ifq_len--;
> +                             ifq->ifq_qdrops++;
> +                             break;
> +                     }
> +             }
> +             /*
> +              * There's no lower priority queue that we can
> +              * drop from so don't enqueue this one.
> +              */
> +             if (prio == m->m_pkthdr.pf.prio)
> +                     return (ENOBUFS);
> +     }
> +
>       ml_enqueue(pl, m);
>  
>       return (0);
>  }

Index: ifq.c
===================================================================
RCS file: /cvs/src/sys/net/ifq.c,v
retrieving revision 1.6
diff -u -p -r1.6 ifq.c
--- ifq.c       24 Jan 2017 03:57:35 -0000      1.6
+++ ifq.c       2 Mar 2017 00:31:53 -0000
@@ -29,7 +29,7 @@
  * priq glue
  */
 unsigned int    priq_idx(unsigned int, const struct mbuf *);
-int             priq_enq(struct ifqueue *, struct mbuf *);
+struct mbuf    *priq_enq(struct ifqueue *, struct mbuf *);
 struct mbuf    *priq_deq_begin(struct ifqueue *, void **);
 void            priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
 void            priq_purge(struct ifqueue *, struct mbuf_list *);
@@ -225,7 +225,8 @@ ifq_attach(struct ifqueue *ifq, const st
        ifq->ifq_q = newq;
 
        while ((m = ml_dequeue(&ml)) != NULL) {
-               if (ifq->ifq_ops->ifqop_enq(ifq, m) != 0) {
+               m = ifq->ifq_ops->ifqop_enq(ifq, m);
+               if (m != NULL) {
                        ifq->ifq_qdrops++;
                        ml_enqueue(&free_ml, m);
                } else
@@ -252,13 +253,14 @@ ifq_destroy(struct ifqueue *ifq)
 }
 
 int
-ifq_enqueue_try(struct ifqueue *ifq, struct mbuf *m)
+ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
 {
+       struct mbuf *dm;
        int rv;
 
        mtx_enter(&ifq->ifq_mtx);
-       rv = ifq->ifq_ops->ifqop_enq(ifq, m);
-       if (rv == 0) {
+       dm = ifq->ifq_ops->ifqop_enq(ifq, m);
+       if (dm == NULL) {
                ifq->ifq_len++;
 
                ifq->ifq_packets++;
@@ -269,19 +271,14 @@ ifq_enqueue_try(struct ifqueue *ifq, str
                ifq->ifq_qdrops++;
        mtx_leave(&ifq->ifq_mtx);
 
-       return (rv);
-}
-
-int
-ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
-{
-       int err;
-
-       err = ifq_enqueue_try(ifq, m);
-       if (err != 0)
-               m_freem(m);
+       if (dm == NULL)
+               rv = 0;
+       else {
+               m_freem(dm);
+               rv = ENOBUFS;
+       }
 
-       return (err);
+       return (rv);
 }
 
 struct mbuf *
@@ -403,14 +400,14 @@ priq_free(unsigned int idx, void *pq)
        free(pq, M_DEVBUF, sizeof(struct priq));
 }
 
-int
+struct mbuf *
 priq_enq(struct ifqueue *ifq, struct mbuf *m)
 {
        struct priq *pq;
        struct priq_list *pl;
 
        if (ifq_len(ifq) >= ifq->ifq_maxlen)
-               return (ENOBUFS);
+               return (m);
 
        pq = ifq->ifq_q;
        KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
@@ -423,7 +420,7 @@ priq_enq(struct ifqueue *ifq, struct mbu
                pl->tail->m_nextpkt = m;
        pl->tail = m;
 
-       return (0);
+       return (NULL);
 }
 
 struct mbuf *
Index: ifq.h
===================================================================
RCS file: /cvs/src/sys/net/ifq.h,v
retrieving revision 1.9
diff -u -p -r1.9 ifq.h
--- ifq.h       24 Jan 2017 10:08:30 -0000      1.9
+++ ifq.h       2 Mar 2017 00:31:53 -0000
@@ -117,25 +117,21 @@ struct ifqueue {
  * the ifqueue. All the pending mbufs are removed from the previous
  * conditioner and requeued on the new.
  *
- * === ifq_enqueue() and ifq_enqueue_try()
+ * === ifq_enqueue()
  *
- * ifq_enqueue() and ifq_enqueue_try() attempt to fit an mbuf onto the
- * ifqueue. If the current traffic conditioner rejects the packet it
- * wont be queued and will be counted as a drop. ifq_enqueue() will
- * free the mbuf on the callers behalf if the packet is rejected.
- * ifq_enqueue_try() does not free the mbuf, allowing the caller to
- * reuse it.
+ * ifq_enqueue() attempts to fit an mbuf onto the ifqueue. The
+ * current traffic conditioner may drop a packet to make space on the
+ * queue.
  *
  * === ifq_start()
  *
- * Once a packet has been successfully queued with ifq_enqueue() or
- * ifq_enqueue_try(), the network card is notified with a call to
- * if_start(). If an interface is marked with IFXF_MPSAFE in its
- * if_xflags field, if_start() calls ifq_start() to dispatch the
- * interfaces start routine. Calls to ifq_start() run in the ifqueue
- * serialisation context, guaranteeing that only one instance of
- * ifp->if_start() will be running in the system at any point in time.
- *
+ * Once a packet has been successfully queued with ifq_enqueue(),
+ * the network card is notified with a call to if_start(). If an
+ * interface is marked with IFXF_MPSAFE in its if_xflags field,
+ * if_start() calls ifq_start() to dispatch the interfaces start
+ * routine. Calls to ifq_start() run in the ifqueue serialisation
+ * context, guaranteeing that only one instance of ifp->if_start()
+ * will be running in the system at any point in time.
  *
  * == Traffic conditioners API
  *
@@ -324,7 +320,7 @@ struct ifqueue {
 struct ifq_ops {
        unsigned int             (*ifqop_idx)(unsigned int,
                                    const struct mbuf *);
-       int                      (*ifqop_enq)(struct ifqueue *, struct mbuf *);
+       struct mbuf             *(*ifqop_enq)(struct ifqueue *, struct mbuf *);
        struct mbuf             *(*ifqop_deq_begin)(struct ifqueue *, void **);
        void                     (*ifqop_deq_commit)(struct ifqueue *,
                                    struct mbuf *, void *);
@@ -341,7 +337,6 @@ struct ifq_ops {
 void            ifq_init(struct ifqueue *, struct ifnet *, unsigned int);
 void            ifq_attach(struct ifqueue *, const struct ifq_ops *, void *);
 void            ifq_destroy(struct ifqueue *);
-int             ifq_enqueue_try(struct ifqueue *, struct mbuf *);
 int             ifq_enqueue(struct ifqueue *, struct mbuf *);
 struct mbuf    *ifq_deq_begin(struct ifqueue *);
 void            ifq_deq_commit(struct ifqueue *, struct mbuf *);
Index: hfsc.c
===================================================================
RCS file: /cvs/src/sys/net/hfsc.c,v
retrieving revision 1.35
diff -u -p -r1.35 hfsc.c
--- hfsc.c      24 Jan 2017 03:57:35 -0000      1.35
+++ hfsc.c      2 Mar 2017 00:31:53 -0000
@@ -260,7 +260,7 @@ struct pool hfsc_class_pl, hfsc_internal
  */
 
 unsigned int    hfsc_idx(unsigned int, const struct mbuf *);
-int             hfsc_enq(struct ifqueue *, struct mbuf *);
+struct mbuf    *hfsc_enq(struct ifqueue *, struct mbuf *);
 struct mbuf    *hfsc_deq_begin(struct ifqueue *, void **);
 void            hfsc_deq_commit(struct ifqueue *, struct mbuf *, void *);
 void            hfsc_purge(struct ifqueue *, struct mbuf_list *);
@@ -650,7 +650,7 @@ hfsc_nextclass(struct hfsc_class *cl)
        return (cl);
 }
 
-int
+struct mbuf *
 hfsc_enq(struct ifqueue *ifq, struct mbuf *m)
 {
        struct hfsc_if *hif = ifq->ifq_q;
@@ -660,14 +660,14 @@ hfsc_enq(struct ifqueue *ifq, struct mbu
            cl->cl_children != NULL) {
                cl = hif->hif_defaultclass;
                if (cl == NULL)
-                       return (ENOBUFS);
+                       return (m);
                cl->cl_pktattr = NULL;
        }
 
        if (ml_len(&cl->cl_q.q) >= cl->cl_q.qlimit) {
                /* drop occurred.  mbuf needs to be freed */
                PKTCNTR_INC(&cl->cl_stats.drop_cnt, m->m_pkthdr.len);
-               return (ENOBUFS);
+               return (m);
        }
 
        ml_enqueue(&cl->cl_q.q, m);
@@ -677,7 +677,7 @@ hfsc_enq(struct ifqueue *ifq, struct mbu
        if (ml_len(&cl->cl_q.q) == 1)
                hfsc_set_active(hif, cl, m->m_pkthdr.len);
 
-       return (0);
+       return (NULL);
 }
 
 struct mbuf *

Reply via email to