Author: gnn
Date: Wed Jan 12 20:44:11 2011
New Revision: 217329
URL: http://svn.freebsd.org/changeset/base/217329

Log:
  MFC: 215207
  
  Add a queue to hold packets while we await an ARP reply.
  
  When a fast machine first brings up some non TCP networking program
  it is quite possible that we will drop packets due to the fact that
  only one packet can be held per ARP entry.  This leads to packets
  being missed when a program starts or restarts if the ARP data is
  not currently in the ARP cache.
  
  This code adds a new sysctl, net.link.ether.inet.maxhold, which defines
  a system wide maximum number of packets to be held in each ARP entry.
  Up to maxhold packets are queued until an ARP reply is received or
  the ARP times out.  The default setting is the old value of 1
  which has been part of the BSD networking code since time
  immemorial.
  
  Expose the time we hold an incomplete ARP entry by adding
  the sysctl net.link.ether.inet.wait, which defaults to 20
  seconds, the value used when the new ARP code was added..
  
  Reviewed by:  bz, rpaulo

Added:
  stable/8/tools/regression/netinet/arphold/
     - copied from r215207, head/tools/regression/netinet/arphold/
Modified:
  stable/8/sys/net/if_llatbl.c
  stable/8/sys/net/if_llatbl.h
  stable/8/sys/netinet/if_ether.c
  stable/8/sys/netinet/in.c
Directory Properties:
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/tools/regression/netinet/   (props changed)

Modified: stable/8/sys/net/if_llatbl.c
==============================================================================
--- stable/8/sys/net/if_llatbl.c        Wed Jan 12 20:38:55 2011        
(r217328)
+++ stable/8/sys/net/if_llatbl.c        Wed Jan 12 20:44:11 2011        
(r217329)
@@ -100,18 +100,34 @@ done:
  * This function is called by the timer functions
  * such as arptimer() and nd6_llinfo_timer(), and
  * the caller does the locking.
+ *
+ * Returns the number of held packets, if any, that were dropped.
  */
-void
+size_t
 llentry_free(struct llentry *lle)
 {
-       
+       size_t pkts_dropped;
+       struct mbuf *next;
+
+       pkts_dropped = 0;
        LLE_WLOCK_ASSERT(lle);
        LIST_REMOVE(lle, lle_next);
 
-       if (lle->la_hold != NULL)
+       while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) {
+               next = lle->la_hold->m_nextpkt;
                m_freem(lle->la_hold);
+               lle->la_hold = next;
+               lle->la_numheld--;
+               pkts_dropped++;
+       }
+
+       KASSERT(lle->la_numheld == 0, 
+               ("%s: la_numheld %d > 0, pkts_droped %ld", __func__, 
+                lle->la_numheld, pkts_dropped));
 
        LLE_FREE_LOCKED(lle);
+
+       return (pkts_dropped);
 }
 
 /*
@@ -412,6 +428,7 @@ llatbl_lle_show(struct llentry_sa *la)
        db_printf(" lle_tbl=%p\n", lle->lle_tbl);
        db_printf(" lle_head=%p\n", lle->lle_head);
        db_printf(" la_hold=%p\n", lle->la_hold);
+       db_printf(" la_numheld=%d\n", lle->la_numheld);
        db_printf(" la_expire=%ju\n", (uintmax_t)lle->la_expire);
        db_printf(" la_flags=0x%04x\n", lle->la_flags);
        db_printf(" la_asked=%u\n", lle->la_asked);

Modified: stable/8/sys/net/if_llatbl.h
==============================================================================
--- stable/8/sys/net/if_llatbl.h        Wed Jan 12 20:38:55 2011        
(r217328)
+++ stable/8/sys/net/if_llatbl.h        Wed Jan 12 20:44:11 2011        
(r217329)
@@ -58,6 +58,7 @@ struct llentry {
        struct lltable           *lle_tbl;
        struct llentries         *lle_head;
        struct mbuf              *la_hold;
+       int                      la_numheld;  /* # of packets currently held */
        time_t                   la_expire;
        uint16_t                 la_flags;    
        uint16_t                 la_asked;
@@ -184,7 +185,7 @@ void                lltable_drain(int);
 #endif
 int            lltable_sysctl_dumparp(int, struct sysctl_req *);
 
-void           llentry_free(struct llentry *);
+size_t         llentry_free(struct llentry *);
 int            llentry_update(struct llentry **, struct lltable *,
                        struct sockaddr_storage *, struct ifnet *);
 

Modified: stable/8/sys/netinet/if_ether.c
==============================================================================
--- stable/8/sys/netinet/if_ether.c     Wed Jan 12 20:38:55 2011        
(r217328)
+++ stable/8/sys/netinet/if_ether.c     Wed Jan 12 20:44:11 2011        
(r217329)
@@ -89,13 +89,16 @@ VNET_DEFINE(int, useloopback) = 1;  /* us
 static VNET_DEFINE(int, arp_proxyall) = 0;
 static VNET_DEFINE(int, arpt_down) = 20;      /* keep incomplete entries for
                                               * 20 seconds */
-static VNET_DEFINE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h 
*/
+VNET_DEFINE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
+
+static VNET_DEFINE(int, arp_maxhold) = 1;
 
 #define        V_arpt_keep             VNET(arpt_keep)
 #define        V_arpt_down             VNET(arpt_down)
 #define        V_arp_maxtries          VNET(arp_maxtries)
 #define        V_arp_proxyall          VNET(arp_proxyall)
 #define        V_arpstat               VNET(arpstat)
+#define        V_arp_maxhold           VNET(arp_maxhold)
 
 SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW,
        &VNET_NAME(arpt_keep), 0,
@@ -109,9 +112,15 @@ SYSCTL_VNET_INT(_net_link_ether_inet, OI
 SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW,
        &VNET_NAME(arp_proxyall), 0,
        "Enable proxy ARP for all suitable requests");
+SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, wait, CTLFLAG_RW,
+       &VNET_NAME(arpt_down), 0,
+       "Incomplete ARP entry lifetime in seconds");
 SYSCTL_VNET_STRUCT(_net_link_ether_arp, OID_AUTO, stats, CTLFLAG_RW,
        &VNET_NAME(arpstat), arpstat,
        "ARP statistics (struct arpstat, net/if_arp.h)");
+SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_RW,
+       &VNET_NAME(arp_maxhold), 0, 
+       "Number of packets to hold per ARP entry");
 
 static void    arp_init(void);
 void           arprequest(struct ifnet *,
@@ -160,6 +169,7 @@ arptimer(void *arg)
 {
        struct ifnet *ifp;
        struct llentry   *lle;
+       int pkts_dropped;
 
        KASSERT(arg != NULL, ("%s: arg NULL", __func__));
        lle = (struct llentry *)arg;
@@ -174,7 +184,8 @@ arptimer(void *arg)
                    callout_active(&lle->la_timer)) {
                        callout_stop(&lle->la_timer);
                        LLE_REMREF(lle);
-                       (void) llentry_free(lle);
+                       pkts_dropped = llentry_free(lle);
+                       ARPSTAT_ADD(dropped, pkts_dropped);
                        ARPSTAT_INC(timeouts);
                } 
 #ifdef DIAGNOSTIC
@@ -273,6 +284,8 @@ arpresolve(struct ifnet *ifp, struct rte
 {
        struct llentry *la = 0;
        u_int flags = 0;
+       struct mbuf *curr = NULL;
+       struct mbuf *next = NULL;
        int error, renew;
 
        *lle = NULL;
@@ -348,15 +361,28 @@ retry:
        }
        /*
         * There is an arptab entry, but no ethernet address
-        * response yet.  Replace the held mbuf with this
-        * latest one.
+        * response yet.  Add the mbuf to the list, dropping
+        * the oldest packet if we have exceeded the system
+        * setting.
         */
        if (m != NULL) {
+               if (la->la_numheld >= V_arp_maxhold) {
+                       if (la->la_hold != NULL) {
+                               next = la->la_hold->m_nextpkt;
+                               m_freem(la->la_hold);
+                               la->la_hold = next;
+                               la->la_numheld--;
+                               ARPSTAT_INC(dropped);
+                       }
+               } 
                if (la->la_hold != NULL) {
-                       m_freem(la->la_hold);
-                       ARPSTAT_INC(dropped);
-               }
-               la->la_hold = m;
+                       curr = la->la_hold;
+                       while (curr->m_nextpkt != NULL)
+                               curr = curr->m_nextpkt;
+                       curr->m_nextpkt = m;
+               } else 
+                       la->la_hold = m;
+               la->la_numheld++;
                if (renew == 0 && (flags & LLE_EXCLUSIVE)) {
                        flags &= ~LLE_EXCLUSIVE;
                        LLE_DOWNGRADE(la);
@@ -483,7 +509,6 @@ in_arpinput(struct mbuf *m)
        struct rtentry *rt;
        struct ifaddr *ifa;
        struct in_ifaddr *ia;
-       struct mbuf *hold;
        struct sockaddr sa;
        struct in_addr isaddr, itaddr, myaddr;
        u_int8_t *enaddr = NULL;
@@ -696,15 +721,29 @@ match:
                }
                la->la_asked = 0;
                la->la_preempt = V_arp_maxtries;
-               hold = la->la_hold;
-               if (hold != NULL) {
+               /* 
+                * The packets are all freed within the call to the output
+                * routine.
+                *
+                * NB: The lock MUST be released before the call to the
+                * output routine.
+                */
+               if (la->la_hold != NULL) {
+                       struct mbuf *m_hold, *m_hold_next;
+
+                       m_hold = la->la_hold;
                        la->la_hold = NULL;
+                       la->la_numheld = 0;
                        memcpy(&sa, L3_ADDR(la), sizeof(sa));
-               }
-               LLE_WUNLOCK(la);
-               if (hold != NULL)
-                       (*ifp->if_output)(ifp, hold, &sa, NULL);
-       }
+                       LLE_WUNLOCK(la);
+                       for (; m_hold != NULL; m_hold = m_hold_next) {
+                               m_hold_next = m_hold->m_nextpkt;
+                               m_hold->m_nextpkt = NULL;
+                               (*ifp->if_output)(ifp, m_hold, &sa, NULL);
+                       }
+               } else
+                       LLE_WUNLOCK(la);
+       } /* end of FIB loop */
 reply:
        if (op != ARPOP_REQUEST)
                goto drop;

Modified: stable/8/sys/netinet/in.c
==============================================================================
--- stable/8/sys/netinet/in.c   Wed Jan 12 20:38:55 2011        (r217328)
+++ stable/8/sys/netinet/in.c   Wed Jan 12 20:44:11 2011        (r217329)
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 
 #include <net/if.h>
 #include <net/if_var.h>
+#include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_llatbl.h>
 #include <net/if_types.h>
@@ -89,6 +90,9 @@ SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, 
 VNET_DECLARE(struct inpcbinfo, ripcbinfo);
 #define        V_ripcbinfo                     VNET(ripcbinfo)
 
+VNET_DECLARE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
+#define        V_arpstat               VNET(arpstat)
+
 /*
  * Return 1 if an internet address is for a ``local'' host
  * (one to which we have a connection).  If subnetsarelocal
@@ -1363,6 +1367,7 @@ in_lltable_prefix_free(struct lltable *l
        const struct sockaddr_in *msk = (const struct sockaddr_in *)mask;
        struct llentry *lle, *next;
        register int i;
+       size_t pkts_dropped;
 
        for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
                LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
@@ -1375,7 +1380,8 @@ in_lltable_prefix_free(struct lltable *l
                                LLE_WLOCK(lle);
                                if (canceled)
                                        LLE_REMREF(lle);
-                               llentry_free(lle);
+                               pkts_dropped = llentry_free(lle);
+                               ARPSTAT_ADD(dropped, pkts_dropped);
                        }
                }
        }
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to