Re: NFS client unable to recover from server crash

Ian Dowse Mon, 23 Jul 2001 08:47:18 -0700
In message <[EMAIL PROTECTED]>, Maxim Sobolev writes:
>I found that after introduction of the new RPC NFS client is no longer
>able to recover from server crash (both cluent and server are 5-CURRENT
>systems). After a well known `nfs server not responding' message, client
>hangs and even though server comes back in a minute or two it doesn't
>recover and just sits in this state forvewer. All unmount requests gets
>stuck in the kernel, so as a processes that accessing files from that
>mount point. This doesn't looks like a right thing and obviously should
>be fixed before 5.0-RELEASE.

I've seen some similar effects, but I don't think it has anything
to do with the new RPC code, as that only runs at mount time. It
would be useful if you could use tcpdump to see if any requests
are being transmitted, and if they are getting responses. Also
try running kdgb on the client to get a kernel backtrace of the
stuck processes.

Is this a UDP or TCP based mount?

If you are feeling brave, you could also try the patch below. It
is a selection of changes to the kernel NFS code that I have built
up over the last few months. I don't think it could solve the hangs,
but it should improve the chance of interruptible mounts accepting
^C while waiting, and (just added the other day) umount -f should
work while the server is down even if processes are hung.

Ian


Index: nfs.h
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/nfs/nfs.h,v
retrieving revision 1.59
diff -u -r1.59 nfs.h
--- nfs.h       2001/04/17 20:45:21     1.59
+++ nfs.h       2001/07/20 13:19:51
@@ -633,6 +633,7 @@
                              struct mbuf *));
 int    nfs_adv __P((struct mbuf **, caddr_t *, int, int));
 void   nfs_nhinit __P((void));
+void   nfs_nmcancelreqs __P((struct nfsmount *));
 void   nfs_timer __P((void*));
 int    nfsrv_dorec __P((struct nfssvc_sock *, struct nfsd *, 
                         struct nfsrv_descript **));
Index: nfs_nqlease.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/nfs/nfs_nqlease.c,v
retrieving revision 1.59
diff -u -r1.59 nfs_nqlease.c
--- nfs_nqlease.c       2001/05/01 08:13:14     1.59
+++ nfs_nqlease.c       2001/05/01 14:29:22
@@ -952,7 +952,9 @@
 }
 
 /*
- * Called for client side callbacks
+ * Called for client side callbacks.
+ * NB: We are responsible for freeing `mrep' in all cases, but note
+ * that anything that does a 'goto nfsmout' frees it for us.
  */
 int
 nqnfs_callback(nmp, mrep, md, dpos)
@@ -982,8 +984,10 @@
        nfsd->nd_md = md;
        nfsd->nd_dpos = dpos;
        error = nfs_getreq(nfsd, &tnfsd, FALSE);
-       if (error)
+       if (error) {
+               m_freem(mrep);
                return (error);
+       }
        md = nfsd->nd_md;
        dpos = nfsd->nd_dpos;
        if (nfsd->nd_procnum != NQNFSPROC_EVICTED) {
Index: nfs_socket.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/nfs/nfs_socket.c,v
retrieving revision 1.66
diff -u -r1.66 nfs_socket.c
--- nfs_socket.c        2001/05/01 08:13:14     1.66
+++ nfs_socket.c        2001/07/20 13:45:01
@@ -144,7 +144,8 @@
  */
 #define        NFS_CWNDSCALE   256
 #define        NFS_MAXCWND     (NFS_CWNDSCALE * 32)
-static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
+#define NFS_NBACKOFF   8
+static int nfs_backoff[NFS_NBACKOFF] = { 2, 4, 8, 16, 32, 64, 128, 256, };
 int nfsrtton = 0;
 struct nfsrtt nfsrtt;
 struct callout_handle  nfs_timer_handle;
@@ -299,11 +300,17 @@
                splx(s);
        }
        if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
-               so->so_rcv.sb_timeo = (5 * hz);
-               so->so_snd.sb_timeo = (5 * hz);
+               so->so_rcv.sb_timeo = (2 * hz);
+               so->so_snd.sb_timeo = (2 * hz);
        } else {
-               so->so_rcv.sb_timeo = 0;
-               so->so_snd.sb_timeo = 0;
+               /*
+                * We would normally set the timeouts to 0 (never time out)
+                * for non-interruptible mounts. However, nfs_nmcancelreqs()
+                * can still prematurely terminate requests, so avoid
+                * waiting forever.
+                */
+               so->so_rcv.sb_timeo = 10 * hz;
+               so->so_snd.sb_timeo = 10 * hz;
        }
 
        /*
@@ -1400,10 +1407,18 @@
        for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) {
                nmp = rep->r_nmp;
                if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
-                       continue;
-               if (nfs_sigintr(nmp, rep, rep->r_procp)) {
-                       nfs_softterm(rep);
                        continue;
+               /*
+                * Test for signals on interruptible mounts. We try to
+                * maintain normal (uninterruptible) semantics while the
+                * server is up, but respond quickly to signals when it
+                * is down.
+                */
+               if (nmp->nm_timeouts >= NFS_NBACKOFF / 2) {
+                       if (nfs_sigintr(nmp, rep, rep->r_procp)) {
+                               nfs_softterm(rep);
+                               continue;
+                       }
                }
                if (rep->r_rtt >= 0) {
                        rep->r_rtt++;
@@ -1415,7 +1430,7 @@
                                timeo *= nfs_backoff[nmp->nm_timeouts - 1];
                        if (rep->r_rtt <= timeo)
                                continue;
-                       if (nmp->nm_timeouts < 8)
+                       if (nmp->nm_timeouts < NFS_NBACKOFF)
                                nmp->nm_timeouts++;
                }
                /*
@@ -1438,8 +1453,6 @@
                                rep->r_rexmit = NFS_MAXREXMIT;
                        continue;
                }
-               if ((so = nmp->nm_so) == NULL)
-                       continue;
 
                /*
                 * If there is enough space and the window allows..
@@ -1447,6 +1460,8 @@
                 * Set r_rtt to -1 in case we fail to send it now.
                 */
                rep->r_rtt = -1;
+               if ((so = nmp->nm_so) == NULL)
+                       continue;
                if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
                   ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
                    (rep->r_flags & R_SENT) ||
@@ -1510,6 +1525,27 @@
 }
 
 /*
+ * Mark all outstanding requests pertaining to a nfs mount with R_SOFTTERM.
+ * This is used by forced unmounts to terminate any outstanding RPCs.
+ */
+void
+nfs_nmcancelreqs(nmp)
+       struct nfsmount *nmp;
+{
+       struct nfsreq *req;
+       int s;
+
+       s = splnet();
+       for (req = nfs_reqq.tqh_first; req != 0; req = req->r_chain.tqe_next) {
+               if (nmp != req->r_nmp || req->r_mrep != NULL ||
+                   (req->r_flags & R_SOFTTERM))
+                       continue;
+               nfs_softterm(req);
+       }
+       splx(s);
+}
+
+/*
  * Flag a request as being about to terminate (due to NFSMNT_INT/NFSMNT_SOFT).
  * The nm_send count is decremented now to avoid deadlocks when the process in
  * soreceive() hasn't yet managed to send its own request.
@@ -1576,7 +1612,7 @@
        } else
                p = (struct proc *)0;
        while (*statep & NFSSTA_SNDLOCK) {
-               if (nfs_sigintr(rep->r_nmp, rep, p))
+               if (rep != NULL && (rep->r_flags & R_SOFTTERM))
                        return (EINTR);
                *statep |= NFSSTA_WANTSND;
                (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1),
@@ -1620,7 +1656,7 @@
        else
                slpflag = 0;
        while (*statep & NFSSTA_RCVLOCK) {
-               if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
+               if (rep != NULL && (rep->r_flags & R_SOFTTERM))
                        return (EINTR);
                *statep |= NFSSTA_WANTRCV;
                (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1), "nfsrcvlk",
@@ -1638,6 +1674,9 @@
                        slptimeo = 2 * hz;
                }
        }
+       /* Always fail if our request has been cancelled. */
+       if (rep != NULL && (rep->r_flags & R_SOFTTERM))
+               return (EINTR);
        *statep |= NFSSTA_RCVLOCK;
        return (0);
 }
Index: nfs_subs.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/nfs/nfs_subs.c,v
retrieving revision 1.103
diff -u -r1.103 nfs_subs.c
--- nfs_subs.c  2001/07/04 16:20:16     1.103
+++ nfs_subs.c  2001/07/10 21:46:16
@@ -1120,7 +1120,7 @@
        nfs_true = txdr_unsigned(TRUE);
        nfs_false = txdr_unsigned(FALSE);
        nfs_xdrneg1 = txdr_unsigned(-1);
-       nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
+       nfs_ticks = (hz * NFS_TICKINTVL + 999) / 1000;
        if (nfs_ticks < 1)
                nfs_ticks = 1;
        /* Ensure async daemons disabled */
Index: nfs_vfsops.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/nfs/nfs_vfsops.c,v
retrieving revision 1.100
diff -u -r1.100 nfs_vfsops.c
--- nfs_vfsops.c        2001/06/28 04:10:07     1.100
+++ nfs_vfsops.c        2001/07/20 13:47:21
@@ -624,7 +624,7 @@
        splx(s);
 
        if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
-               nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
+               nmp->nm_timeo = (argp->timeo * NFS_HZ + 9) / 10;
                if (nmp->nm_timeo < NFS_MINTIMEO)
                        nmp->nm_timeo = NFS_MINTIMEO;
                else if (nmp->nm_timeo > NFS_MAXTIMEO)
@@ -970,6 +970,10 @@
        nmp->nm_state |= NFSSTA_DISMINPROG;
        while (nmp->nm_inprog != NULLVP)
                (void) tsleep((caddr_t)&lbolt, PSOCK, "nfsdism", 0);
+
+       /* In the forced case, cancel any outstanding requests. */
+       if (flags & FORCECLOSE)
+               nfs_nmcancelreqs(nmp);
 
        /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
        error = vflush(mp, 1, flags);

To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message
Re: NFS client unable to recover from server crash

Reply via email to