On 27.11.2012 04:04, Alfred Perlstein wrote:
Author: alfred
Date: Tue Nov 27 03:04:24 2012
New Revision: 243594
URL: http://svnweb.freebsd.org/changeset/base/243594

Log:
   Auto size the tcbhashsize structure based on max sockets.

   While here, also make the code that enforces power-of-two more
   forgiving, instead of just resetting to 512, graciously round-down
   to the next lower power of two.

The porthash (also a parameter to in_pcbinfo_init()) should not be
scaled the same as tcbhashsize.  It's unlikely that anyone is going
to have more than a thousand listen ports.

The default tcbhashsize scaling factor of maxsockets / 4 is IMHO
excessive.  If someone is running at the limit he's going to up
maxsockets to double the target load.  Dividing by 8 is a better
balance for normal and well used large memory machines.
The hashsize calculation logic is done a bit in a roundabout way.

What do think of the attached patch to fix the porthash and to simplify
the hashsize logic?

--
Andre

Index: netinet/tcp_subr.c
===================================================================
--- netinet/tcp_subr.c  (revision 243631)
+++ netinet/tcp_subr.c  (working copy)
@@ -229,14 +229,17 @@
                    void *ip4hdr, const void *ip6hdr);

 /*
- * Target size of TCP PCB hash tables. Must be a power of two.
+ * Minimal size of TCP PCB hash tables. Must be a power of two.
  *
  * Note that this can be overridden by the kernel environment
  * variable net.inet.tcp.tcbhashsize
  */
-#ifndef TCBHASHSIZE
-#define TCBHASHSIZE    0
+#ifndef TCBMINHASHSIZE
+#define TCBMINHASHSIZE 512
 #endif
+#ifndef TCBPORTHASHSIZE
+#define TCBPORTHASHSIZE        512
+#endif

 /*
  * XXX
@@ -282,35 +285,10 @@
        return (0);
 }

-/*
- * Take a value and get the next power of 2 that doesn't overflow.
- * Used to size the tcp_inpcb hash buckets.
- */
-static int
-maketcp_hashsize(int size)
-{
-       int hashsize;
-
-       /*
-        * auto tune.
-        * get the next power of 2 higher than maxsockets.
-        */
-       hashsize = 1 << fls(size);
-       /* catch overflow, and just go one power of 2 smaller */
-       if (hashsize < size) {
-               hashsize = 1 << (fls(size) - 1);
-       }
-       return (hashsize);
-}
-
 void
 tcp_init(void)
 {
-       const char *tcbhash_tuneable;
-       int hashsize;

-       tcbhash_tuneable = "net.inet.tcp.tcbhashsize";
-
        if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN,
            &V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
                printf("%s: WARNING: unable to register helper hook\n", 
__func__);
@@ -318,49 +296,17 @@
            &V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 
0)
                printf("%s: WARNING: unable to register helper hook\n", 
__func__);

-       hashsize = TCBHASHSIZE;
-       TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
-       if (hashsize == 0) {
-               /*
-                * Auto tune the hash size based on maxsockets.
-                * A perfect hash would have a 1:1 mapping
-                * (hashsize = maxsockets) however it's been
-                * suggested that O(2) average is better.
-                */
-               hashsize = maketcp_hashsize(maxsockets / 4);
-               /*
-                * Our historical default is 512,
-                * do not autotune lower than this.
-                */
-               if (hashsize < 512)
-                       hashsize = 512;
-               if (bootverbose)
-                       printf("%s: %s auto tuned to %d\n", __func__,
-                           tcbhash_tuneable, hashsize);
+       tcp_tcbhashsize = 0x1 << fls((maxsockets / 8) - 1);
+       TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &tcp_tcbhashsize);
+       if (!powerof2(tcp_tcbhashsize) ||
+           tcp_tcbhashsize < TCBMINHASHSIZE) {
+               printf("WARNING: TCB hash size not a power of 2 or too 
small\n");
+               tcp_tcbhashsize = TCBMINHASHSIZE;
        }
-       /*
-        * We require a hashsize to be a power of two.
-        * Previously if it was not a power of two we would just reset it
-        * back to 512, which could be a nasty surprise if you did not notice
-        * the error message.
-        * Instead what we do is clip it to the closest power of two lower
-        * than the specified hash value.
-        */
-       if (!powerof2(hashsize)) {
-               int oldhashsize = hashsize;
+       in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, tcp_tcbhashsize,
+           TCBPORTHASHSIZE, "tcp_inpcb", tcp_inpcb_init, NULL,
+           UMA_ZONE_NOFREE, IPI_HASHFIELDS_4TUPLE);

-               hashsize = maketcp_hashsize(hashsize);
-               /* prevent absurdly low value */
-               if (hashsize < 16)
-                       hashsize = 16;
-               printf("%s: WARNING: TCB hash size not a power of 2, "
-                   "clipped from %d to %d.\n", __func__, oldhashsize,
-                   hashsize);
-       }
-       in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
-           "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE,
-           IPI_HASHFIELDS_4TUPLE);
-
        /*
         * These have to be type stable for the benefit of the timers.
         */
@@ -393,7 +339,6 @@
                tcp_rexmit_min = 1;
        tcp_rexmit_slop = TCPTV_CPU_VAR;
        tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
-       tcp_tcbhashsize = hashsize;

        TUNABLE_INT_FETCH("net.inet.tcp.soreceive_stream", 
&tcp_soreceive_stream);
        if (tcp_soreceive_stream) {
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to