Author: kp
Date: Fri Nov  2 15:26:51 2018
New Revision: 340061
URL: https://svnweb.freebsd.org/changeset/base/340061

Log:
  pf: Split the fragment reassembly queue into smaller parts
  
  Remember 16 entry points based on the fragment offset.  Instead of
  a worst case of 8196 list traversals we now check a maximum of 512
  list entries or 16 array elements.
  
  Obtained from:        OpenBSD
  Differential Revision:        https://reviews.freebsd.org/D17733

Modified:
  head/sys/net/pfvar.h
  head/sys/netpfil/pf/pf_norm.c

Modified: head/sys/net/pfvar.h
==============================================================================
--- head/sys/net/pfvar.h        Fri Nov  2 15:23:57 2018        (r340060)
+++ head/sys/net/pfvar.h        Fri Nov  2 15:26:51 2018        (r340061)
@@ -1205,6 +1205,12 @@ struct pf_divert {
 #define PFR_KENTRY_HIWAT       200000  /* Number of table entries */
 
 /*
+ * Limit the length of the fragment queue traversal.  Remember
+ * search entry points based on the fragment offset.
+ */
+#define PF_FRAG_ENTRY_POINTS           16
+
+/*
  * ioctl parameter structures
  */
 

Modified: head/sys/netpfil/pf/pf_norm.c
==============================================================================
--- head/sys/netpfil/pf/pf_norm.c       Fri Nov  2 15:23:57 2018        
(r340060)
+++ head/sys/netpfil/pf/pf_norm.c       Fri Nov  2 15:26:51 2018        
(r340061)
@@ -87,6 +87,7 @@ struct pf_fragment {
 #define fr_af  fr_key.frc_af
 #define fr_proto       fr_key.frc_proto
 
+       struct pf_frent *fr_firstoff[PF_FRAG_ENTRY_POINTS];
        RB_ENTRY(pf_fragment) fr_entry;
        TAILQ_ENTRY(pf_fragment) frag_next;
        uint32_t        fr_timeout;
@@ -136,6 +137,13 @@ static struct pf_frent *pf_create_fragment(u_short *);
 static int     pf_frent_holes(struct pf_frent *frent);
 static struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *key,
                    struct pf_frag_tree *tree);
+static inline int      pf_frent_index(struct pf_frent *);
+static void    pf_frent_insert(struct pf_fragment *,
+                           struct pf_frent *, struct pf_frent *);
+void                   pf_frent_remove(struct pf_fragment *,
+                           struct pf_frent *);
+struct pf_frent                *pf_frent_previous(struct pf_fragment *,
+                           struct pf_frent *);
 static struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *,
                    struct pf_frent *, u_short *);
 static struct mbuf *pf_join_fragment(struct pf_fragment *);
@@ -308,6 +316,7 @@ pf_remove_fragment(struct pf_fragment *frag)
 {
 
        PF_FRAG_ASSERT();
+       KASSERT(frag, ("frag != NULL"));
 
        RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag);
        TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
@@ -367,9 +376,150 @@ pf_frent_holes(struct pf_frent *frent)
        return holes;
 }
 
+static inline int
+pf_frent_index(struct pf_frent *frent)
+{
+       /*
+        * We have an array of 16 entry points to the queue.  A full size
+        * 65535 octet IP packet can have 8192 fragments.  So the queue
+        * traversal length is at most 512 and at most 16 entry points are
+        * checked.  We need 128 additional bytes on a 64 bit architecture.
+        */
+       CTASSERT(((u_int16_t)0xffff &~ 7) / (0x10000 / PF_FRAG_ENTRY_POINTS) ==
+           16 - 1);
+       CTASSERT(((u_int16_t)0xffff >> 3) / PF_FRAG_ENTRY_POINTS == 512 - 1);
+
+       return frent->fe_off / (0x10000 / PF_FRAG_ENTRY_POINTS);
+}
+
+static void
+pf_frent_insert(struct pf_fragment *frag, struct pf_frent *frent,
+    struct pf_frent *prev)
+{
+       int index;
+
+       if (prev == NULL) {
+               TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
+       } else {
+               KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off,
+                   ("overlapping fragment"));
+               TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
+       }
+
+       index = pf_frent_index(frent);
+       if (frag->fr_firstoff[index] == NULL) {
+               KASSERT(prev == NULL || pf_frent_index(prev) < index,
+                   ("prev == NULL || pf_frent_index(pref) < index"));
+               frag->fr_firstoff[index] = frent;
+       } else {
+               if (frent->fe_off < frag->fr_firstoff[index]->fe_off) {
+                       KASSERT(prev == NULL || pf_frent_index(prev) < index,
+                           ("prev == NULL || pf_frent_index(pref) < index"));
+                       frag->fr_firstoff[index] = frent;
+               } else {
+                       KASSERT(prev != NULL, ("prev != NULL"));
+                       KASSERT(pf_frent_index(prev) == index,
+                           ("pf_frent_index(prev) == index"));
+               }
+       }
+
+       frag->fr_holes += pf_frent_holes(frent);
+}
+
+void
+pf_frent_remove(struct pf_fragment *frag, struct pf_frent *frent)
+{
+       struct pf_frent *prev = TAILQ_PREV(frent, pf_fragq, fr_next);
+       struct pf_frent *next = TAILQ_NEXT(frent, fr_next);
+       int index;
+
+       frag->fr_holes -= pf_frent_holes(frent);
+
+       index = pf_frent_index(frent);
+       KASSERT(frag->fr_firstoff[index] != NULL, ("frent not found"));
+       if (frag->fr_firstoff[index]->fe_off == frent->fe_off) {
+               if (next == NULL) {
+                       frag->fr_firstoff[index] = NULL;
+               } else {
+                       KASSERT(frent->fe_off + frent->fe_len <= next->fe_off,
+                           ("overlapping fragment"));
+                       if (pf_frent_index(next) == index) {
+                               frag->fr_firstoff[index] = next;
+                       } else {
+                               frag->fr_firstoff[index] = NULL;
+                       }
+               }
+       } else {
+               KASSERT(frag->fr_firstoff[index]->fe_off < frent->fe_off,
+                   ("frag->fr_firstoff[index]->fe_off < frent->fe_off"));
+               KASSERT(prev != NULL, ("prev != NULL"));
+               KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off,
+                   ("overlapping fragment"));
+               KASSERT(pf_frent_index(prev) == index,
+                   ("pf_frent_index(prev) == index"));
+       }
+
+       TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
+}
+
+struct pf_frent *
+pf_frent_previous(struct pf_fragment *frag, struct pf_frent *frent)
+{
+       struct pf_frent *prev, *next;
+       int index;
+
+       /*
+        * If there are no fragments after frag, take the final one.  Assume
+        * that the global queue is not empty.
+        */
+       prev = TAILQ_LAST(&frag->fr_queue, pf_fragq);
+       KASSERT(prev != NULL, ("prev != NULL"));
+       if (prev->fe_off <= frent->fe_off)
+               return prev;
+       /*
+        * We want to find a fragment entry that is before frag, but still
+        * close to it.  Find the first fragment entry that is in the same
+        * entry point or in the first entry point after that.  As we have
+        * already checked that there are entries behind frag, this will
+        * succeed.
+        */
+       for (index = pf_frent_index(frent); index < PF_FRAG_ENTRY_POINTS;
+           index++) {
+               prev = frag->fr_firstoff[index];
+               if (prev != NULL)
+                       break;
+       }
+       KASSERT(prev != NULL, ("prev != NULL"));
+       /*
+        * In prev we may have a fragment from the same entry point that is
+        * before frent, or one that is just one position behind frent.
+        * In the latter case, we go back one step and have the predecessor.
+        * There may be none if the new fragment will be the first one.
+        */
+       if (prev->fe_off > frent->fe_off) {
+               prev = TAILQ_PREV(prev, pf_fragq, fr_next);
+               if (prev == NULL)
+                       return NULL;
+               KASSERT(prev->fe_off <= frent->fe_off,
+                   ("prev->fe_off <= frent->fe_off"));
+               return prev;
+       }
+       /*
+        * In prev is the first fragment of the entry point.  The offset
+        * of frag is behind it.  Find the closest previous fragment.
+        */
+       for (next = TAILQ_NEXT(prev, fr_next); next != NULL;
+           next = TAILQ_NEXT(next, fr_next)) {
+               if (next->fe_off > frent->fe_off)
+                       break;
+               prev = next;
+       }
+       return prev;
+}
+
 static struct pf_fragment *
 pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
-               u_short *reason)
+    u_short *reason)
 {
        struct pf_frent         *after, *next, *prev;
        struct pf_fragment      *frag;
@@ -416,6 +566,7 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct
                }
 
                *(struct pf_fragment_cmp *)frag = *key;
+               memset(frag->fr_firstoff, 0, sizeof(frag->fr_firstoff));
                frag->fr_timeout = time_uptime;
                frag->fr_maxlen = frent->fe_len;
                frag->fr_holes = 1;
@@ -425,8 +576,7 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct
                TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
 
                /* We do not have a previous fragment. */
-               TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
-               frag->fr_holes += pf_frent_holes(frent);
+               pf_frent_insert(frag, frent, NULL);
 
                return (frag);
        }
@@ -455,17 +605,15 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct
                        goto bad_fragment;
        }
 
-       /* Find a fragment after the current one. */
-       prev = NULL;
-       TAILQ_FOREACH(after, &frag->fr_queue, fr_next) {
-               if (after->fe_off > frent->fe_off)
-                       break;
-               prev = after;
+       /* Find neighbors for newly inserted fragment */
+       prev = pf_frent_previous(frag, frent);
+       if (prev == NULL) {
+               after = TAILQ_FIRST(&frag->fr_queue);
+               KASSERT(after != NULL, ("after != NULL"));
+       } else {
+               after = TAILQ_NEXT(prev, fr_next);
        }
 
-       KASSERT(prev != NULL || after != NULL,
-           ("prev != NULL || after != NULL"));
-
        if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) {
                uint16_t precut;
 
@@ -493,17 +641,12 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct
 
                /* This fragment is completely overlapped, lose it. */
                next = TAILQ_NEXT(after, fr_next);
-               frag->fr_holes -= pf_frent_holes(after);
+               pf_frent_remove(frag, after);
                m_freem(after->fe_m);
-               TAILQ_REMOVE(&frag->fr_queue, after, fr_next);
                uma_zfree(V_pf_frent_z, after);
        }
 
-       if (prev == NULL)
-               TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
-       else
-               TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
-       frag->fr_holes += pf_frent_holes(frent);
+       pf_frent_insert(frag, frent, prev);
 
        return (frag);
 
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to