[PATCH 3.19.y-ckt 037/164] nfsd: serialize state seqid morphing operations
3.19.8-ckt11 -stable review patch. If anyone has any objections, please let me know. -- From: Jeff Layton commit 35a92fe8770ce54c5eb275cd76128645bea2d200 upstream. Andrew was seeing a race occur when an OPEN and OPEN_DOWNGRADE were running in parallel. The server would receive the OPEN_DOWNGRADE first and check its seqid, but then an OPEN would race in and bump it. The OPEN_DOWNGRADE would then complete and bump the seqid again. The result was that the OPEN_DOWNGRADE would be applied after the OPEN, even though it should have been rejected since the seqid changed. The only recourse we have here I think is to serialize operations that bump the seqid in a stateid, particularly when we're given a seqid in the call. To address this, we add a new rw_semaphore to the nfs4_ol_stateid struct. We do a down_write prior to checking the seqid after looking up the stateid to ensure that nothing else is going to bump it while we're operating on it. In the case of OPEN, we do a down_read, as the call doesn't contain a seqid. Those can run in parallel -- we just need to serialize them when there is a concurrent OPEN_DOWNGRADE or CLOSE. LOCK and LOCKU however always take the write lock as there is no opportunity for parallelizing those. Reported-and-Tested-by: Andrew W Elble Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields [ kamal: backport to 3.19-stable: context ] Signed-off-by: Kamal Mostafa --- fs/nfsd/nfs4state.c | 33 - fs/nfsd/state.h | 17 + 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8bb73f8..2bf99cd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3262,6 +3262,7 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; + init_rwsem(>st_rwsem); spin_lock(>oo_owner.so_client->cl_lock); list_add(>st_perstateowner, >oo_owner.so_stateids); spin_lock(>fi_lock); @@ -4083,21 +4084,27 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ + down_read(>st_rwsem); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); - if (status) + if (status) { + up_read(>st_rwsem); goto out; + } } else { stp = open->op_stp; open->op_stp = NULL; init_open_stateid(stp, fp, open); + down_read(>st_rwsem); status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { + up_read(>st_rwsem); release_open_stateid(stp); goto out; } } update_stateid(>st_stid.sc_stateid); memcpy(>op_stateid, >st_stid.sc_stateid, sizeof(stateid_t)); + up_read(>st_rwsem); if (nfsd4_has_session(>cstate)) { if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { @@ -4665,10 +4672,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; + down_write(>st_rwsem); status = check_stateid_generation(stateid, >st_stid.sc_stateid, nfsd4_has_session(cstate)); - if (status) - return status; - return nfs4_check_fh(current_fh, stp); + if (status == nfs_ok) + status = nfs4_check_fh(current_fh, stp); + if (status != nfs_ok) + up_write(>st_rwsem); + return status; } /* @@ -4715,6 +4725,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs return status; oo = openowner(stp->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { + up_write(>st_rwsem); nfs4_put_stid(>st_stid); return nfserr_bad_stateid; } @@ -4745,11 +4756,14 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; - if (oo->oo_flags & NFS4_OO_CONFIRMED) + if (oo->oo_flags & NFS4_OO_CONFIRMED) { + up_write(>st_rwsem); goto put_stateid; + } oo->oo_flags |= NFS4_OO_CONFIRMED; update_stateid(>st_stid.sc_stateid); memcpy(>oc_resp_stateid, >st_stid.sc_stateid, sizeof(stateid_t)); + up_write(>st_rwsem); dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", __func__, oc->oc_seqid,
[PATCH 3.19.y-ckt 037/164] nfsd: serialize state seqid morphing operations
3.19.8-ckt11 -stable review patch. If anyone has any objections, please let me know. -- From: Jeff Laytoncommit 35a92fe8770ce54c5eb275cd76128645bea2d200 upstream. Andrew was seeing a race occur when an OPEN and OPEN_DOWNGRADE were running in parallel. The server would receive the OPEN_DOWNGRADE first and check its seqid, but then an OPEN would race in and bump it. The OPEN_DOWNGRADE would then complete and bump the seqid again. The result was that the OPEN_DOWNGRADE would be applied after the OPEN, even though it should have been rejected since the seqid changed. The only recourse we have here I think is to serialize operations that bump the seqid in a stateid, particularly when we're given a seqid in the call. To address this, we add a new rw_semaphore to the nfs4_ol_stateid struct. We do a down_write prior to checking the seqid after looking up the stateid to ensure that nothing else is going to bump it while we're operating on it. In the case of OPEN, we do a down_read, as the call doesn't contain a seqid. Those can run in parallel -- we just need to serialize them when there is a concurrent OPEN_DOWNGRADE or CLOSE. LOCK and LOCKU however always take the write lock as there is no opportunity for parallelizing those. Reported-and-Tested-by: Andrew W Elble Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields [ kamal: backport to 3.19-stable: context ] Signed-off-by: Kamal Mostafa --- fs/nfsd/nfs4state.c | 33 - fs/nfsd/state.h | 17 + 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8bb73f8..2bf99cd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3262,6 +3262,7 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; + init_rwsem(>st_rwsem); spin_lock(>oo_owner.so_client->cl_lock); list_add(>st_perstateowner, >oo_owner.so_stateids); spin_lock(>fi_lock); @@ -4083,21 +4084,27 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ + down_read(>st_rwsem); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); - if (status) + if (status) { + up_read(>st_rwsem); goto out; + } } else { stp = open->op_stp; open->op_stp = NULL; init_open_stateid(stp, fp, open); + down_read(>st_rwsem); status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { + up_read(>st_rwsem); release_open_stateid(stp); goto out; } } update_stateid(>st_stid.sc_stateid); memcpy(>op_stateid, >st_stid.sc_stateid, sizeof(stateid_t)); + up_read(>st_rwsem); if (nfsd4_has_session(>cstate)) { if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { @@ -4665,10 +4672,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; + down_write(>st_rwsem); status = check_stateid_generation(stateid, >st_stid.sc_stateid, nfsd4_has_session(cstate)); - if (status) - return status; - return nfs4_check_fh(current_fh, stp); + if (status == nfs_ok) + status = nfs4_check_fh(current_fh, stp); + if (status != nfs_ok) + up_write(>st_rwsem); + return status; } /* @@ -4715,6 +4725,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs return status; oo = openowner(stp->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { + up_write(>st_rwsem); nfs4_put_stid(>st_stid); return nfserr_bad_stateid; } @@ -4745,11 +4756,14 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; - if (oo->oo_flags & NFS4_OO_CONFIRMED) + if (oo->oo_flags & NFS4_OO_CONFIRMED) { + up_write(>st_rwsem); goto put_stateid; + } oo->oo_flags |= NFS4_OO_CONFIRMED; update_stateid(>st_stid.sc_stateid); memcpy(>oc_resp_stateid, >st_stid.sc_stateid, sizeof(stateid_t)); + up_write(>st_rwsem);