Hello, while working on nfs performance issues with overquota writes (which turned out to be a ffs issue), I came up with the attached patch. What this does it, for nfs over TCP, restrict a socket buffer processing to a single thread (right now, all pending requests are processed by all threads in parallel). This has two advantages: - if a single client sends lots of request (like writes comming from a linux client), a single thread is busy and other threads will be available to serve other client's requests quickly - by avoiding CPU cache sharing and lock contention at the vnode level (if all requests are for the same vnode, which is the common case), we get sighly better performances.
My testbed is a linux box with 2 Opteron 2431 (12 core total) and 32GB RAM writing over gigabit ethernet to a NetBSD server (dual Intel(R) Xeon(TM) CPU 3.00GHz, 4 hyperthread cores total) running nfsd -tun4. Without the patch, the server processes about 1230 writes per second, with this patch it processes about 1250 writes/s. Comments ? -- Manuel Bouyer <bou...@antioche.eu.org> NetBSD: 26 ans d'experience feront toujours la difference --
Index: nfs.h =================================================================== RCS file: /cvsroot/src/sys/nfs/nfs.h,v retrieving revision 1.72 diff -u -p -r1.72 nfs.h --- nfs.h 2 Mar 2010 23:19:09 -0000 1.72 +++ nfs.h 22 Nov 2012 11:49:55 -0000 @@ -476,7 +476,8 @@ struct nfssvc_sock { #define SLP_A_DISCONN 0x04 /* Bits for "ns_gflags" */ -#define SLP_G_DOREC 0x02 /* on nfssvc_sockpending queue */ +#define SLP_G_MOREREQ 0x01 /* the thread should keep processing this slp */ +#define SLP_G_INQUEUE 0x02 /* on nfssvc_sockpending queue */ /* Bits for "ns_sflags" */ #define SLP_S_LASTFRAG 0x40 Index: nfs_srvsocket.c =================================================================== RCS file: /cvsroot/src/sys/nfs/nfs_srvsocket.c,v retrieving revision 1.4 diff -u -p -r1.4 nfs_srvsocket.c --- nfs_srvsocket.c 3 Sep 2009 20:59:12 -0000 1.4 +++ nfs_srvsocket.c 22 Nov 2012 11:49:55 -0000 @@ -447,10 +447,21 @@ nfsrv_wakenfsd_locked(struct nfssvc_sock if ((slp->ns_flags & SLP_VALID) == 0) return; - if (slp->ns_gflags & SLP_G_DOREC) + if (slp->ns_gflags & (SLP_G_MOREREQ|SLP_G_INQUEUE)) return; + if (slp->ns_sref > 0 && slp->ns_so->so_type == SOCK_STREAM) { + /* + * a nfsd thread is already dealing with this socket, + * don't split it accross multiple threads + */ + slp->ns_gflags |= SLP_G_MOREREQ; + return; + } + nd = SLIST_FIRST(&nfsd_idle_head); if (nd) { + KASSERT((nfsd_head_flag & NFSD_CHECKSLP) == 0); + KASSERT(TAILQ_FIRST(&nfssvc_sockpending) == NULL); SLIST_REMOVE_HEAD(&nfsd_idle_head, nfsd_idle); if (nd->nfsd_slp) panic("nfsd wakeup"); @@ -459,7 +470,7 @@ nfsrv_wakenfsd_locked(struct nfssvc_sock nd->nfsd_slp = slp; cv_signal(&nd->nfsd_cv); } else { - slp->ns_gflags |= SLP_G_DOREC; + slp->ns_gflags |= SLP_G_INQUEUE; nfsd_head_flag |= NFSD_CHECKSLP; TAILQ_INSERT_TAIL(&nfssvc_sockpending, slp, ns_pending); } Index: nfs_syscalls.c =================================================================== RCS file: /cvsroot/src/sys/nfs/nfs_syscalls.c,v retrieving revision 1.153 diff -u -p -r1.153 nfs_syscalls.c --- nfs_syscalls.c 31 Dec 2009 20:01:33 -0000 1.153 +++ nfs_syscalls.c 22 Nov 2012 11:49:55 -0000 @@ -99,6 +99,7 @@ struct nfssvc_sock *nfs_udp6sock; static struct nfssvc_sock *nfsrv_sockalloc(void); static void nfsrv_sockfree(struct nfssvc_sock *); static void nfsd_rt(int, struct nfsrv_descript *, int); +static void nfsrv_slpderef_locked(struct nfssvc_sock *); /* * NFS server system calls @@ -451,86 +452,89 @@ nfssvc_nfsd(struct nfsd_srvargs *nsd, vo * Loop getting rpc requests until SIGKILL. */ for (;;) { - bool dummy; - if ((curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) != 0) { preempt(); } - if (nfsd->nfsd_slp == NULL) { - mutex_enter(&nfsd_lock); - while (nfsd->nfsd_slp == NULL && - (nfsd_head_flag & NFSD_CHECKSLP) == 0) { - SLIST_INSERT_HEAD(&nfsd_idle_head, nfsd, - nfsd_idle); - error = cv_wait_sig(&nfsd->nfsd_cv, &nfsd_lock); - if (error) { - slp = nfsd->nfsd_slp; - nfsd->nfsd_slp = NULL; - if (!slp) - SLIST_REMOVE(&nfsd_idle_head, - nfsd, nfsd, nfsd_idle); - mutex_exit(&nfsd_lock); - if (slp) { - nfsrv_wakenfsd(slp); - nfsrv_slpderef(slp); - } - goto done; - } + mutex_enter(&nfsd_lock); + if (nfsd->nfsd_slp != NULL) { + slp = nfsd->nfsd_slp; + if ((slp->ns_gflags & SLP_G_MOREREQ) == 0) { + nfsd->nfsd_slp = NULL; + nfsrv_slpderef_locked(slp); + mutex_enter(&nfsd_lock); + } else { + slp->ns_gflags &= ~SLP_G_MOREREQ; } - if (nfsd->nfsd_slp == NULL && - (nfsd_head_flag & NFSD_CHECKSLP) != 0) { - slp = TAILQ_FIRST(&nfssvc_sockpending); + } + while (nfsd->nfsd_slp == NULL && + (nfsd_head_flag & NFSD_CHECKSLP) == 0) { + SLIST_INSERT_HEAD(&nfsd_idle_head, nfsd, + nfsd_idle); + error = cv_wait_sig(&nfsd->nfsd_cv, &nfsd_lock); + if (error) { + slp = nfsd->nfsd_slp; + nfsd->nfsd_slp = NULL; + if (!slp) + SLIST_REMOVE(&nfsd_idle_head, + nfsd, nfsd, nfsd_idle); + mutex_exit(&nfsd_lock); if (slp) { - KASSERT((slp->ns_gflags & SLP_G_DOREC) - != 0); - TAILQ_REMOVE(&nfssvc_sockpending, slp, - ns_pending); - slp->ns_gflags &= ~SLP_G_DOREC; - slp->ns_sref++; - nfsd->nfsd_slp = slp; - } else - nfsd_head_flag &= ~NFSD_CHECKSLP; + nfsrv_wakenfsd(slp); + nfsrv_slpderef(slp); + } + goto done; } - KASSERT(nfsd->nfsd_slp == NULL || - nfsd->nfsd_slp->ns_sref > 0); - mutex_exit(&nfsd_lock); - if ((slp = nfsd->nfsd_slp) == NULL) - continue; - if (slp->ns_flags & SLP_VALID) { - bool more; + } + if (nfsd->nfsd_slp == NULL && + (nfsd_head_flag & NFSD_CHECKSLP) != 0) { + slp = TAILQ_FIRST(&nfssvc_sockpending); + if (slp) { + KASSERT((slp->ns_gflags & SLP_G_INQUEUE) + != 0); + TAILQ_REMOVE(&nfssvc_sockpending, slp, + ns_pending); + slp->ns_gflags &= ~SLP_G_INQUEUE; + slp->ns_sref++; + nfsd->nfsd_slp = slp; + } else + nfsd_head_flag &= ~NFSD_CHECKSLP; + } + KASSERT(nfsd->nfsd_slp == NULL || + nfsd->nfsd_slp->ns_sref > 0); + mutex_exit(&nfsd_lock); + if ((slp = nfsd->nfsd_slp) == NULL) + continue; + if (slp->ns_flags & SLP_VALID) { + bool more; - if (nfsdsock_testbits(slp, SLP_A_NEEDQ)) { - nfsrv_rcv(slp); - } - if (nfsdsock_testbits(slp, SLP_A_DISCONN)) { - nfsrv_zapsock(slp); - } - error = nfsrv_dorec(slp, nfsd, &nd, &more); - getmicrotime(&tv); - cur_usec = (u_quad_t)tv.tv_sec * 1000000 + - (u_quad_t)tv.tv_usec; - writes_todo = 0; - if (error) { - struct nfsrv_descript *nd2; + if (nfsdsock_testbits(slp, SLP_A_NEEDQ)) { + nfsrv_rcv(slp); + } + if (nfsdsock_testbits(slp, SLP_A_DISCONN)) { + nfsrv_zapsock(slp); + } + error = nfsrv_dorec(slp, nfsd, &nd, &more); + getmicrotime(&tv); + cur_usec = (u_quad_t)tv.tv_sec * 1000000 + + (u_quad_t)tv.tv_usec; + writes_todo = 0; + if (error) { + struct nfsrv_descript *nd2; - mutex_enter(&nfsd_lock); - nd2 = LIST_FIRST(&slp->ns_tq); - if (nd2 != NULL && - nd2->nd_time <= cur_usec) { - error = 0; - cacherep = RC_DOIT; - writes_todo = 1; - } - mutex_exit(&nfsd_lock); - } - if (error == 0 && more) { - nfsrv_wakenfsd(slp); + mutex_enter(&nfsd_lock); + nd2 = LIST_FIRST(&slp->ns_tq); + if (nd2 != NULL && + nd2->nd_time <= cur_usec) { + error = 0; + cacherep = RC_DOIT; + writes_todo = 1; } + mutex_exit(&nfsd_lock); + } + if (error == 0 && more) { + nfsrv_wakenfsd(slp); } - } else { - error = 0; - slp = nfsd->nfsd_slp; } KASSERT(slp != NULL); KASSERT(nfsd->nfsd_slp == slp); @@ -539,8 +543,10 @@ nfssvc_nfsd(struct nfsd_srvargs *nsd, vo nfsdreq_free(nd); nd = NULL; } - nfsd->nfsd_slp = NULL; - nfsrv_slpderef(slp); + if ((slp->ns_flags & SLP_VALID) == 0) { + nfsd->nfsd_slp = NULL; + nfsrv_slpderef(slp); + } continue; } sotype = slp->ns_so->so_type; @@ -683,10 +689,6 @@ nfssvc_nfsd(struct nfsd_srvargs *nsd, vo writes_todo = 0; mutex_exit(&nfsd_lock); } while (writes_todo); - if (nfsrv_dorec(slp, nfsd, &nd, &dummy)) { - nfsd->nfsd_slp = NULL; - nfsrv_slpderef(slp); - } } done: mutex_enter(&nfsd_lock); @@ -724,9 +726,9 @@ nfsrv_zapsock(struct nfssvc_sock *slp) return; } mutex_enter(&nfsd_lock); - if (slp->ns_gflags & SLP_G_DOREC) { + if (slp->ns_gflags & SLP_G_INQUEUE) { TAILQ_REMOVE(&nfssvc_sockpending, slp, ns_pending); - slp->ns_gflags &= ~SLP_G_DOREC; + slp->ns_gflags &= ~SLP_G_INQUEUE; } mutex_exit(&nfsd_lock); @@ -775,15 +777,22 @@ nfsrv_zapsock(struct nfssvc_sock *slp) void nfsrv_slpderef(struct nfssvc_sock *slp) { + mutex_enter(&nfsd_lock); + nfsrv_slpderef_locked(slp); +} + +static void +nfsrv_slpderef_locked(struct nfssvc_sock *slp) +{ uint32_t ref; - mutex_enter(&nfsd_lock); KASSERT(slp->ns_sref > 0); + KASSERT(mutex_owned(&nfsd_lock)); ref = --slp->ns_sref; if (ref == 0 && (slp->ns_flags & SLP_VALID) == 0) { file_t *fp; - KASSERT((slp->ns_gflags & SLP_G_DOREC) == 0); + KASSERT((slp->ns_gflags & SLP_G_INQUEUE) == 0); TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain); mutex_exit(&nfsd_lock);