from: Huang Le <[email protected]>

In move_to_close_lru(), which only be called on path of nfsd4 CLOSE op,
the code could wait for its stid ref count drop to 2 while holding its
state owner replay mutex.  However, the other stid ref holder (normally
a parallel CLOSE op) that move_to_close_lru() is waiting for might be
accquiring the same replay mutex.

This patch fix the issue by clearing the replay owner before waiting, and
assign it back after then.

Signed-off-by: Huang Le <[email protected]>
---

I guess we should cc this patch to stable tree, since a malicious client
could craft parallel CLOSE ops to put all nfsd tasks in D state shortly.

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 618e660..5f6a48f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3829,12 +3829,12 @@ static void nfs4_free_openowner(struct nfs4_stateowner 
*so)
  * them before returning however.
  */
 static void
-move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
+move_to_close_lru(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid 
*s,
+               struct net *net)
 {
        struct nfs4_ol_stateid *last;
        struct nfs4_openowner *oo = openowner(s->st_stateowner);
-       struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net,
-                                               nfsd_net_id);
+       struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
        dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo);
 
@@ -3846,8 +3846,19 @@ static void nfs4_free_openowner(struct nfs4_stateowner 
*so)
         * Wait for the refcount to drop to 2. Since it has been unhashed,
         * there should be no danger of the refcount going back up again at
         * this point.
+        *
+        * Before waiting, we clear cstate->replay_owner to release its
+        * so_replay.rp_mutex, since other reference holder might be accquiring
+        * the same mutex before they could drop the references.  The 
replay_owner
+        * can be assigned back safely after they done their jobs.
         */
-       wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
+       if (refcount_read(&s->st_stid.sc_count) != 2) {
+               struct nfs4_stateowner *so = cstate->replay_owner;
+
+               nfsd4_cstate_clear_replay(cstate);
+               wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
+               nfsd4_cstate_assign_replay(cstate, so);
+       }
 
        release_all_access(s);
        if (s->st_stid.sc_file) {
@@ -5531,7 +5542,8 @@ static inline void nfs4_stateid_downgrade(struct 
nfs4_ol_stateid *stp, u32 to_ac
        return status;
 }
 
-static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
+static void nfsd4_close_open_stateid(struct nfsd4_compound_state *cstate,
+               struct nfs4_ol_stateid *s)
 {
        struct nfs4_client *clp = s->st_stid.sc_client;
        bool unhashed;
@@ -5549,7 +5561,7 @@ static void nfsd4_close_open_stateid(struct 
nfs4_ol_stateid *s)
                spin_unlock(&clp->cl_lock);
                free_ol_stateid_reaplist(&reaplist);
                if (unhashed)
-                       move_to_close_lru(s, clp->net);
+                       move_to_close_lru(cstate, s, clp->net);
        }
 }
 
@@ -5587,7 +5599,7 @@ static void nfsd4_close_open_stateid(struct 
nfs4_ol_stateid *s)
         */
        nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
 
-       nfsd4_close_open_stateid(stp);
+       nfsd4_close_open_stateid(cstate, stp);
        mutex_unlock(&stp->st_mutex);
 
        /* v4.1+ suggests that we send a special stateid in here, since the

Reply via email to