On Mon, 2020-11-16 at 22:18 -0500, J. Bruce Fields wrote:
> From: "J. Bruce Fields" <[email protected]>
>
> fill_{pre/post}_attr are unconditionally using i_version even when the
> underlying filesystem doesn't have proper support for i_version.
>
> Move the code that chooses which i_version to use to the common
> nfsd4_change_attribute().
>
> The NFSEXP_V4ROOT case probably doesn't matter (the pseudoroot
> filesystem is usually read-only and unlikely to see operations with pre
> and post change attributes), but let's put it in the same place anyway
> for consistency.
>
> Fixes: c654b8a9cba6 ("nfsd: support ext4 i_version")
> Signed-off-by: J. Bruce Fields <[email protected]>
> ---
> fs/nfsd/nfs4xdr.c | 11 +----------
> fs/nfsd/nfsfh.c | 11 +++++++----
> fs/nfsd/nfsfh.h | 23 -----------------------
> fs/nfsd/vfs.c | 32 ++++++++++++++++++++++++++++++++
> fs/nfsd/vfs.h | 3 +++
> 5 files changed, 43 insertions(+), 37 deletions(-)
>
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 833a2c64dfe8..6806207b6d18 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2295,16 +2295,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
> static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode
> *inode,
> struct svc_export *exp)
> {
> - if (exp->ex_flags & NFSEXP_V4ROOT) {
> - *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
> - *p++ = 0;
> - } else if (IS_I_VERSION(inode)) {
> - p = xdr_encode_hyper(p, nfsd4_change_attribute(stat, inode));
> - } else {
> - *p++ = cpu_to_be32(stat->ctime.tv_sec);
> - *p++ = cpu_to_be32(stat->ctime.tv_nsec);
> - }
> - return p;
> + return xdr_encode_hyper(p, nfsd4_change_attribute(stat, inode, exp));
> }
>
>
>
>
> /*
> diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
> index b3b4e8809aa9..4fbe1413e767 100644
> --- a/fs/nfsd/nfsfh.c
> +++ b/fs/nfsd/nfsfh.c
> @@ -719,6 +719,7 @@ void fill_pre_wcc(struct svc_fh *fhp)
> {
> struct inode *inode;
> struct kstat stat;
> + struct svc_export *exp = fhp->fh_export;
> __be32 err;
>
>
>
>
> if (fhp->fh_pre_saved)
> @@ -736,7 +737,7 @@ void fill_pre_wcc(struct svc_fh *fhp)
> fhp->fh_pre_mtime = stat.mtime;
> fhp->fh_pre_ctime = stat.ctime;
> fhp->fh_pre_size = stat.size;
> - fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
> + fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode, exp);
> fhp->fh_pre_saved = true;
> }
>
>
>
>
> @@ -746,17 +747,19 @@ void fill_pre_wcc(struct svc_fh *fhp)
> void fill_post_wcc(struct svc_fh *fhp)
> {
> __be32 err;
> + struct inode *inode = d_inode(fhp->fh_dentry);
> + struct svc_export *exp = fhp->fh_export;
>
>
>
>
> if (fhp->fh_post_saved)
> printk("nfsd: inode locked twice during operation.\n");
>
>
>
>
> err = fh_getattr(fhp, &fhp->fh_post_attr);
> - fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr,
> - d_inode(fhp->fh_dentry));
> + fhp->fh_post_change =
> + nfsd4_change_attribute(&fhp->fh_post_attr, inode, exp);
> if (err) {
> fhp->fh_post_saved = false;
> /* Grab the ctime anyway - set_change_info might use it */
> - fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime;
> + fhp->fh_post_attr.ctime = inode->i_ctime;
> } else
> fhp->fh_post_saved = true;
> }
> diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
> index 56cfbc361561..547aef9b3265 100644
> --- a/fs/nfsd/nfsfh.h
> +++ b/fs/nfsd/nfsfh.h
> @@ -245,29 +245,6 @@ fh_clear_wcc(struct svc_fh *fhp)
> fhp->fh_pre_saved = false;
> }
>
>
>
>
> -/*
> - * We could use i_version alone as the change attribute. However,
> - * i_version can go backwards after a reboot. On its own that doesn't
> - * necessarily cause a problem, but if i_version goes backwards and then
> - * is incremented again it could reuse a value that was previously used
> - * before boot, and a client who queried the two values might
> - * incorrectly assume nothing changed.
> - *
> - * By using both ctime and the i_version counter we guarantee that as
> - * long as time doesn't go backwards we never reuse an old value.
> - */
> -static inline u64 nfsd4_change_attribute(struct kstat *stat,
> - struct inode *inode)
> -{
> - u64 chattr;
> -
> - chattr = stat->ctime.tv_sec;
> - chattr <<= 30;
> - chattr += stat->ctime.tv_nsec;
> - chattr += inode_query_iversion(inode);
> - return chattr;
> -}
> -
> extern void fill_pre_wcc(struct svc_fh *fhp);
> extern void fill_post_wcc(struct svc_fh *fhp);
> #else
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index 1ecaceebee13..2c71b02dd1fe 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -2390,3 +2390,35 @@ nfsd_permission(struct svc_rqst *rqstp, struct
> svc_export *exp,
>
>
>
>
> return err? nfserrno(err) : 0;
> }
> +
> +/*
> + * We could use i_version alone as the change attribute. However,
> + * i_version can go backwards after a reboot. On its own that doesn't
> + * necessarily cause a problem, but if i_version goes backwards and then
> + * is incremented again it could reuse a value that was previously used
> + * before boot, and a client who queried the two values might
> + * incorrectly assume nothing changed.
> + *
> + * By using both ctime and the i_version counter we guarantee that as
> + * long as time doesn't go backwards we never reuse an old value.
> + */
> +u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode,
> + struct svc_export *exp)
> +{
> + u64 chattr;
> +
> + if (exp->ex_flags & NFSEXP_V4ROOT) {
> + chattr = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
> + chattr <<= 32;
> + } else if (IS_I_VERSION(inode)) {
> + chattr = stat->ctime.tv_sec;
> + chattr <<= 30;
> + chattr += stat->ctime.tv_nsec;
> + chattr += inode_query_iversion(inode);
> + } else {
> + chattr = stat->ctime.tv_sec;
> + chattr <<= 32;
> + chattr += stat->ctime.tv_nsec;
> + }
> + return chattr;
> +}
I don't think I described what I was thinking well. Let me try again...
There should be no need to change the code in iversion.h -- I think we
can do this in a way that's confined to just nfsd/export code.
What I would suggest is to have nfsd4_change_attribute call the
fetch_iversion op if it exists, instead of checking IS_I_VERSION and
doing the stuff in that block. If fetch_iversion is NULL, then just use
the ctime.
Then, you just need to make sure that the filesystems' export_ops have
an appropriate fetch_iversion vector. xfs, ext4 and btrfs can just call
inode_query_iversion, and NFS and Ceph can call inode_peek_iversion_raw.
The rest of the filesystems can leave fetch_iversion as NULL (since we
don't want to use it on them).
> diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
> index a2442ebe5acf..26ed15256340 100644
> --- a/fs/nfsd/vfs.h
> +++ b/fs/nfsd/vfs.h
> @@ -132,6 +132,9 @@ __be32 nfsd_statfs(struct svc_rqst *, struct
> svc_fh *,
> __be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
> struct dentry *, int);
>
>
>
>
> +u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode,
> + struct svc_export *exp);
> +
> static inline int fh_want_write(struct svc_fh *fh)
> {
> int ret;
--
Jeff Layton <[email protected]>
--
Linux-cachefs mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/linux-cachefs