Author: rmacklem
Date: Sat Jun 17 22:24:19 2017
New Revision: 320062
URL: https://svnweb.freebsd.org/changeset/base/320062

Log:
  Make MAXBCACHEBUF a tunable called vfs.maxbcachebuf.
  
  By making MAXBCACHEBUF a tunable, it can be increased to allow for
  larger read/write data sizes for the NFS client.
  The tunable is limited to MAXPHYS, which is currently 128K.
  Making MAXPHYS a tunable or increasing its value is being discussed,
  since it would be nice to support a read/write data size of 1Mbyte
  for the NFS client when mounting the AmazonEFS file service.
  
  Reviewed by:  kib
  MFC after:    2 weeks
  Relnotes:     yes
  Differential Revision:        https://reviews.freebsd.org/D10991

Modified:
  head/sys/fs/nfs/nfs_commonkrpc.c
  head/sys/fs/nfs/nfsport.h
  head/sys/fs/nfsclient/nfs_clrpcops.c
  head/sys/kern/vfs_bio.c
  head/sys/sys/param.h

Modified: head/sys/fs/nfs/nfs_commonkrpc.c
==============================================================================
--- head/sys/fs/nfs/nfs_commonkrpc.c    Sat Jun 17 20:33:11 2017        
(r320061)
+++ head/sys/fs/nfs/nfs_commonkrpc.c    Sat Jun 17 22:24:19 2017        
(r320062)
@@ -161,7 +161,7 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq
     struct ucred *cred, NFSPROC_T *p, int callback_retry_mult)
 {
        int rcvreserve, sndreserve;
-       int pktscale;
+       int pktscale, pktscalesav;
        struct sockaddr *saddr;
        struct ucred *origcred;
        CLIENT *client;
@@ -210,6 +210,7 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq
                pktscale = 2;
        if (pktscale > 64)
                pktscale = 64;
+       pktscalesav = pktscale;
        /*
         * soreserve() can fail if sb_max is too small, so shrink pktscale
         * and try again if there is an error.
@@ -228,8 +229,12 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq
                goto out;
        }
        do {
-           if (error != 0 && pktscale > 2)
+           if (error != 0 && pktscale > 2) {
+               if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM &&
+                   pktscale == pktscalesav)
+                   printf("Consider increasing kern.ipc.maxsockbuf\n");
                pktscale--;
+           }
            if (nrp->nr_sotype == SOCK_DGRAM) {
                if (nmp != NULL) {
                        sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) *
@@ -243,15 +248,19 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq
                if (nrp->nr_sotype != SOCK_STREAM)
                        panic("nfscon sotype");
                if (nmp != NULL) {
-                       sndreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR +
+                       sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR +
                            sizeof (u_int32_t)) * pktscale;
-                       rcvreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR +
+                       rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR +
                            sizeof (u_int32_t)) * pktscale;
                } else {
                        sndreserve = rcvreserve = 1024 * pktscale;
                }
            }
            error = soreserve(so, sndreserve, rcvreserve);
+           if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM &&
+               pktscale <= 2)
+               printf("Must increase kern.ipc.maxsockbuf or reduce"
+                   " rsize, wsize\n");
        } while (error != 0 && pktscale > 2);
        soclose(so);
        if (error) {

Modified: head/sys/fs/nfs/nfsport.h
==============================================================================
--- head/sys/fs/nfs/nfsport.h   Sat Jun 17 20:33:11 2017        (r320061)
+++ head/sys/fs/nfs/nfsport.h   Sat Jun 17 22:24:19 2017        (r320062)
@@ -1016,7 +1016,7 @@ struct nfsreq {
 };
 
 #ifndef NFS_MAXBSIZE
-#define        NFS_MAXBSIZE    MAXBCACHEBUF
+#define        NFS_MAXBSIZE    (maxbcachebuf)
 #endif
 
 /*

Modified: head/sys/fs/nfsclient/nfs_clrpcops.c
==============================================================================
--- head/sys/fs/nfsclient/nfs_clrpcops.c        Sat Jun 17 20:33:11 2017        
(r320061)
+++ head/sys/fs/nfsclient/nfs_clrpcops.c        Sat Jun 17 22:24:19 2017        
(r320062)
@@ -4625,7 +4625,7 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsc
     struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred,
     NFSPROC_T *p)
 {
-       uint32_t crflags, *tl;
+       uint32_t crflags, maxval, *tl;
        struct nfsrv_descript nfsd;
        struct nfsrv_descript *nd = &nfsd;
        int error, irdcnt;
@@ -4643,8 +4643,8 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsc
        /* Fill in fore channel attributes. */
        NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
        *tl++ = 0;                              /* Header pad size */
-       *tl++ = txdr_unsigned(100000);          /* Max request size */
-       *tl++ = txdr_unsigned(100000);          /* Max response size */
+       *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */
+       *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */
        *tl++ = txdr_unsigned(4096);            /* Max response size cached */
        *tl++ = txdr_unsigned(20);              /* Max operations */
        *tl++ = txdr_unsigned(64);              /* Max slots */
@@ -4691,7 +4691,26 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsc
 
                /* Get the fore channel slot count. */
                NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
-               tl += 3;                /* Skip the other counts. */            
+               tl++;                   /* Skip the header pad size. */
+
+               /* Make sure nm_wsize is small enough. */
+               maxval = fxdr_unsigned(uint32_t, *tl++);
+               while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
+                       if (nmp->nm_wsize > 8096)
+                               nmp->nm_wsize /= 2;
+                       else
+                               break;
+               }
+
+               /* Make sure nm_rsize is small enough. */
+               maxval = fxdr_unsigned(uint32_t, *tl++);
+               while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
+                       if (nmp->nm_rsize > 8096)
+                               nmp->nm_rsize /= 2;
+                       else
+                               break;
+               }
+
                sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
                tl++;
                sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);

Modified: head/sys/kern/vfs_bio.c
==============================================================================
--- head/sys/kern/vfs_bio.c     Sat Jun 17 20:33:11 2017        (r320061)
+++ head/sys/kern/vfs_bio.c     Sat Jun 17 22:24:19 2017        (r320062)
@@ -131,6 +131,7 @@ static void bufkva_reclaim(vmem_t *, int);
 static void bufkva_free(struct buf *);
 static int buf_import(void *, void **, int, int);
 static void buf_release(void *, void **, int);
+static void maxbcachebuf_adjust(void);
 
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
@@ -245,6 +246,9 @@ SYSCTL_LONG(_vfs, OID_AUTO, barrierwrites, CTLFLAG_RW,
 SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_allowed, CTLFLAG_RD,
     &unmapped_buf_allowed, 0,
     "Permit the use of the unmapped i/o");
+int maxbcachebuf = MAXBCACHEBUF;
+SYSCTL_INT(_vfs, OID_AUTO, maxbcachebuf, CTLFLAG_RDTUN, &maxbcachebuf, 0,
+    "Maximum size of a buffer cache block");
 
 /*
  * This lock synchronizes access to bd_request.
@@ -847,6 +851,29 @@ bd_wakeup(void)
 }
 
 /*
+ * Adjust the maxbcachbuf tunable.
+ */
+static void
+maxbcachebuf_adjust(void)
+{
+       int i;
+
+       /*
+        * maxbcachebuf must be a power of 2 >= MAXBSIZE.
+        */
+       i = 2;
+       while (i * 2 <= maxbcachebuf)
+               i *= 2;
+       maxbcachebuf = i;
+       if (maxbcachebuf < MAXBSIZE)
+               maxbcachebuf = MAXBSIZE;
+       if (maxbcachebuf > MAXPHYS)
+               maxbcachebuf = MAXPHYS;
+       if (bootverbose != 0 && maxbcachebuf != MAXBCACHEBUF)
+               printf("maxbcachebuf=%d\n", maxbcachebuf);
+}
+
+/*
  * bd_speedup - speedup the buffer cache flushing code
  */
 void
@@ -893,6 +920,7 @@ kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est)
         */
        physmem_est = physmem_est * (PAGE_SIZE / 1024);
 
+       maxbcachebuf_adjust();
        /*
         * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
         * For the first 64MB of ram nominally allocate sufficient buffers to
@@ -1003,7 +1031,9 @@ bufinit(void)
        struct buf *bp;
        int i;
 
-       CTASSERT(MAXBCACHEBUF >= MAXBSIZE);
+       KASSERT(maxbcachebuf >= MAXBSIZE,
+           ("maxbcachebuf (%d) must be >= MAXBSIZE (%d)\n", maxbcachebuf,
+           MAXBSIZE));
        mtx_init(&bqlocks[QUEUE_DIRTY], "bufq dirty lock", NULL, MTX_DEF);
        mtx_init(&bqlocks[QUEUE_EMPTY], "bufq empty lock", NULL, MTX_DEF);
        for (i = QUEUE_CLEAN; i < QUEUE_CLEAN + CLEAN_QUEUES; i++)
@@ -1050,7 +1080,7 @@ bufinit(void)
         * PAGE_SIZE.
         */
        maxbufspace = (long)nbuf * BKVASIZE;
-       hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - MAXBCACHEBUF * 10);
+       hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - maxbcachebuf * 10);
        lobufspace = (hibufspace / 20) * 19; /* 95% */
        bufspacethresh = lobufspace + (hibufspace - lobufspace) / 2;
 
@@ -1062,9 +1092,9 @@ bufinit(void)
         * The lower 1 MiB limit is the historical upper limit for
         * hirunningspace.
         */
-       hirunningspace = lmax(lmin(roundup(hibufspace / 64, MAXBCACHEBUF),
+       hirunningspace = lmax(lmin(roundup(hibufspace / 64, maxbcachebuf),
            16 * 1024 * 1024), 1024 * 1024);
-       lorunningspace = roundup((hirunningspace * 2) / 3, MAXBCACHEBUF);
+       lorunningspace = roundup((hirunningspace * 2) / 3, maxbcachebuf);
 
        /*
         * Limit the amount of malloc memory since it is wired permanently into
@@ -3484,9 +3514,9 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int 
        KASSERT((flags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC,
            ("GB_KVAALLOC only makes sense with GB_UNMAPPED"));
        ASSERT_VOP_LOCKED(vp, "getblk");
-       if (size > MAXBCACHEBUF)
-               panic("getblk: size(%d) > MAXBCACHEBUF(%d)\n", size,
-                   MAXBCACHEBUF);
+       if (size > maxbcachebuf)
+               panic("getblk: size(%d) > maxbcachebuf(%d)\n", size,
+                   maxbcachebuf);
        if (!unmapped_buf_allowed)
                flags &= ~(GB_UNMAPPED | GB_KVAALLOC);
 

Modified: head/sys/sys/param.h
==============================================================================
--- head/sys/sys/param.h        Sat Jun 17 20:33:11 2017        (r320061)
+++ head/sys/sys/param.h        Sat Jun 17 22:24:19 2017        (r320062)
@@ -244,9 +244,7 @@
  *             Filesystems can of course request smaller chunks.  Actual
  *             backing memory uses a chunk size of a page (PAGE_SIZE).
  *             The default value here can be overridden on a per-architecture
- *             basis by defining it in <machine/param.h>.  This should
- *             probably be done to increase its value, when MAXBCACHEBUF is
- *             defined as a larger value in <machine/param.h>.
+ *             basis by defining it in <machine/param.h>.
  *
  *             If you make BKVASIZE too small you risk seriously fragmenting
  *             the buffer KVM map which may slow things down a bit.  If you
@@ -265,6 +263,14 @@
 #define BKVASIZE       16384   /* must be power of 2 */
 #endif
 #define BKVAMASK       (BKVASIZE-1)
+
+/*
+ * This variable is tuned via vfs.maxbcachebuf and is set to the value of
+ * MAXBCACHEBUF by default.
+ */
+#ifdef _KERNEL
+extern int     maxbcachebuf;
+#endif
 
 /*
  * MAXPATHLEN defines the longest permissible path length after expanding
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to