Author: rmacklem Date: Sat Jun 17 22:24:19 2017 New Revision: 320062 URL: https://svnweb.freebsd.org/changeset/base/320062
Log: Make MAXBCACHEBUF a tunable called vfs.maxbcachebuf. By making MAXBCACHEBUF a tunable, it can be increased to allow for larger read/write data sizes for the NFS client. The tunable is limited to MAXPHYS, which is currently 128K. Making MAXPHYS a tunable or increasing its value is being discussed, since it would be nice to support a read/write data size of 1Mbyte for the NFS client when mounting the AmazonEFS file service. Reviewed by: kib MFC after: 2 weeks Relnotes: yes Differential Revision: https://reviews.freebsd.org/D10991 Modified: head/sys/fs/nfs/nfs_commonkrpc.c head/sys/fs/nfs/nfsport.h head/sys/fs/nfsclient/nfs_clrpcops.c head/sys/kern/vfs_bio.c head/sys/sys/param.h Modified: head/sys/fs/nfs/nfs_commonkrpc.c ============================================================================== --- head/sys/fs/nfs/nfs_commonkrpc.c Sat Jun 17 20:33:11 2017 (r320061) +++ head/sys/fs/nfs/nfs_commonkrpc.c Sat Jun 17 22:24:19 2017 (r320062) @@ -161,7 +161,7 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq struct ucred *cred, NFSPROC_T *p, int callback_retry_mult) { int rcvreserve, sndreserve; - int pktscale; + int pktscale, pktscalesav; struct sockaddr *saddr; struct ucred *origcred; CLIENT *client; @@ -210,6 +210,7 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq pktscale = 2; if (pktscale > 64) pktscale = 64; + pktscalesav = pktscale; /* * soreserve() can fail if sb_max is too small, so shrink pktscale * and try again if there is an error. @@ -228,8 +229,12 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq goto out; } do { - if (error != 0 && pktscale > 2) + if (error != 0 && pktscale > 2) { + if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM && + pktscale == pktscalesav) + printf("Consider increasing kern.ipc.maxsockbuf\n"); pktscale--; + } if (nrp->nr_sotype == SOCK_DGRAM) { if (nmp != NULL) { sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * @@ -243,15 +248,19 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq if (nrp->nr_sotype != SOCK_STREAM) panic("nfscon sotype"); if (nmp != NULL) { - sndreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + + sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR + sizeof (u_int32_t)) * pktscale; - rcvreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + + rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR + sizeof (u_int32_t)) * pktscale; } else { sndreserve = rcvreserve = 1024 * pktscale; } } error = soreserve(so, sndreserve, rcvreserve); + if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM && + pktscale <= 2) + printf("Must increase kern.ipc.maxsockbuf or reduce" + " rsize, wsize\n"); } while (error != 0 && pktscale > 2); soclose(so); if (error) { Modified: head/sys/fs/nfs/nfsport.h ============================================================================== --- head/sys/fs/nfs/nfsport.h Sat Jun 17 20:33:11 2017 (r320061) +++ head/sys/fs/nfs/nfsport.h Sat Jun 17 22:24:19 2017 (r320062) @@ -1016,7 +1016,7 @@ struct nfsreq { }; #ifndef NFS_MAXBSIZE -#define NFS_MAXBSIZE MAXBCACHEBUF +#define NFS_MAXBSIZE (maxbcachebuf) #endif /* Modified: head/sys/fs/nfsclient/nfs_clrpcops.c ============================================================================== --- head/sys/fs/nfsclient/nfs_clrpcops.c Sat Jun 17 20:33:11 2017 (r320061) +++ head/sys/fs/nfsclient/nfs_clrpcops.c Sat Jun 17 22:24:19 2017 (r320062) @@ -4625,7 +4625,7 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsc struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred, NFSPROC_T *p) { - uint32_t crflags, *tl; + uint32_t crflags, maxval, *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error, irdcnt; @@ -4643,8 +4643,8 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsc /* Fill in fore channel attributes. */ NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); *tl++ = 0; /* Header pad size */ - *tl++ = txdr_unsigned(100000); /* Max request size */ - *tl++ = txdr_unsigned(100000); /* Max response size */ + *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */ + *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */ *tl++ = txdr_unsigned(4096); /* Max response size cached */ *tl++ = txdr_unsigned(20); /* Max operations */ *tl++ = txdr_unsigned(64); /* Max slots */ @@ -4691,7 +4691,26 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsc /* Get the fore channel slot count. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); - tl += 3; /* Skip the other counts. */ + tl++; /* Skip the header pad size. */ + + /* Make sure nm_wsize is small enough. */ + maxval = fxdr_unsigned(uint32_t, *tl++); + while (maxval < nmp->nm_wsize + NFS_MAXXDR) { + if (nmp->nm_wsize > 8096) + nmp->nm_wsize /= 2; + else + break; + } + + /* Make sure nm_rsize is small enough. */ + maxval = fxdr_unsigned(uint32_t, *tl++); + while (maxval < nmp->nm_rsize + NFS_MAXXDR) { + if (nmp->nm_rsize > 8096) + nmp->nm_rsize /= 2; + else + break; + } + sep->nfsess_maxcache = fxdr_unsigned(int, *tl++); tl++; sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++); Modified: head/sys/kern/vfs_bio.c ============================================================================== --- head/sys/kern/vfs_bio.c Sat Jun 17 20:33:11 2017 (r320061) +++ head/sys/kern/vfs_bio.c Sat Jun 17 22:24:19 2017 (r320062) @@ -131,6 +131,7 @@ static void bufkva_reclaim(vmem_t *, int); static void bufkva_free(struct buf *); static int buf_import(void *, void **, int, int); static void buf_release(void *, void **, int); +static void maxbcachebuf_adjust(void); #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) @@ -245,6 +246,9 @@ SYSCTL_LONG(_vfs, OID_AUTO, barrierwrites, CTLFLAG_RW, SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_allowed, CTLFLAG_RD, &unmapped_buf_allowed, 0, "Permit the use of the unmapped i/o"); +int maxbcachebuf = MAXBCACHEBUF; +SYSCTL_INT(_vfs, OID_AUTO, maxbcachebuf, CTLFLAG_RDTUN, &maxbcachebuf, 0, + "Maximum size of a buffer cache block"); /* * This lock synchronizes access to bd_request. @@ -847,6 +851,29 @@ bd_wakeup(void) } /* + * Adjust the maxbcachbuf tunable. + */ +static void +maxbcachebuf_adjust(void) +{ + int i; + + /* + * maxbcachebuf must be a power of 2 >= MAXBSIZE. + */ + i = 2; + while (i * 2 <= maxbcachebuf) + i *= 2; + maxbcachebuf = i; + if (maxbcachebuf < MAXBSIZE) + maxbcachebuf = MAXBSIZE; + if (maxbcachebuf > MAXPHYS) + maxbcachebuf = MAXPHYS; + if (bootverbose != 0 && maxbcachebuf != MAXBCACHEBUF) + printf("maxbcachebuf=%d\n", maxbcachebuf); +} + +/* * bd_speedup - speedup the buffer cache flushing code */ void @@ -893,6 +920,7 @@ kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est) */ physmem_est = physmem_est * (PAGE_SIZE / 1024); + maxbcachebuf_adjust(); /* * The nominal buffer size (and minimum KVA allocation) is BKVASIZE. * For the first 64MB of ram nominally allocate sufficient buffers to @@ -1003,7 +1031,9 @@ bufinit(void) struct buf *bp; int i; - CTASSERT(MAXBCACHEBUF >= MAXBSIZE); + KASSERT(maxbcachebuf >= MAXBSIZE, + ("maxbcachebuf (%d) must be >= MAXBSIZE (%d)\n", maxbcachebuf, + MAXBSIZE)); mtx_init(&bqlocks[QUEUE_DIRTY], "bufq dirty lock", NULL, MTX_DEF); mtx_init(&bqlocks[QUEUE_EMPTY], "bufq empty lock", NULL, MTX_DEF); for (i = QUEUE_CLEAN; i < QUEUE_CLEAN + CLEAN_QUEUES; i++) @@ -1050,7 +1080,7 @@ bufinit(void) * PAGE_SIZE. */ maxbufspace = (long)nbuf * BKVASIZE; - hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - MAXBCACHEBUF * 10); + hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - maxbcachebuf * 10); lobufspace = (hibufspace / 20) * 19; /* 95% */ bufspacethresh = lobufspace + (hibufspace - lobufspace) / 2; @@ -1062,9 +1092,9 @@ bufinit(void) * The lower 1 MiB limit is the historical upper limit for * hirunningspace. */ - hirunningspace = lmax(lmin(roundup(hibufspace / 64, MAXBCACHEBUF), + hirunningspace = lmax(lmin(roundup(hibufspace / 64, maxbcachebuf), 16 * 1024 * 1024), 1024 * 1024); - lorunningspace = roundup((hirunningspace * 2) / 3, MAXBCACHEBUF); + lorunningspace = roundup((hirunningspace * 2) / 3, maxbcachebuf); /* * Limit the amount of malloc memory since it is wired permanently into @@ -3484,9 +3514,9 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int KASSERT((flags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC, ("GB_KVAALLOC only makes sense with GB_UNMAPPED")); ASSERT_VOP_LOCKED(vp, "getblk"); - if (size > MAXBCACHEBUF) - panic("getblk: size(%d) > MAXBCACHEBUF(%d)\n", size, - MAXBCACHEBUF); + if (size > maxbcachebuf) + panic("getblk: size(%d) > maxbcachebuf(%d)\n", size, + maxbcachebuf); if (!unmapped_buf_allowed) flags &= ~(GB_UNMAPPED | GB_KVAALLOC); Modified: head/sys/sys/param.h ============================================================================== --- head/sys/sys/param.h Sat Jun 17 20:33:11 2017 (r320061) +++ head/sys/sys/param.h Sat Jun 17 22:24:19 2017 (r320062) @@ -244,9 +244,7 @@ * Filesystems can of course request smaller chunks. Actual * backing memory uses a chunk size of a page (PAGE_SIZE). * The default value here can be overridden on a per-architecture - * basis by defining it in <machine/param.h>. This should - * probably be done to increase its value, when MAXBCACHEBUF is - * defined as a larger value in <machine/param.h>. + * basis by defining it in <machine/param.h>. * * If you make BKVASIZE too small you risk seriously fragmenting * the buffer KVM map which may slow things down a bit. If you @@ -265,6 +263,14 @@ #define BKVASIZE 16384 /* must be power of 2 */ #endif #define BKVAMASK (BKVASIZE-1) + +/* + * This variable is tuned via vfs.maxbcachebuf and is set to the value of + * MAXBCACHEBUF by default. + */ +#ifdef _KERNEL +extern int maxbcachebuf; +#endif /* * MAXPATHLEN defines the longest permissible path length after expanding _______________________________________________ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"