From: NeilBrown <[email protected]>

The F_GETLK fcntl can work with either read access or write access or
both.  It can query F_RDLCK and F_WRLCK locks in either case.

However lockd currently treats F_GETLK similar to F_SETLK in that read
access is required to query an F_RDLCK lock and write access is required
to query a F_WRLCK lock.

This is wrong and can cause problems - e.g.  when qemu accesses a
read-only (e.g. iso) filesystem image over NFS (though why it queries
if it can get a write lock - I don't know.  But it does, and this works
with local filesystems).

So we need TEST requests to be handled differently.  To do this:

- change nlm_do_fopen() to accept O_RDWR as a mode and in that case
  succeed if either a O_RDONLY or O_WRONLY file can be opened.
- change nlm_lookup_file() to accept a mode argument from caller,
  instead of deducing base on lock time, and pass that on to nlm_do_fopen()
- change nlm4svc_retrieve_args() and nlmsvc_retrieve_args() to detect
  TEST requests and pass O_RDWR as a mode to nlm_lookup_file, passing
  the same mode as before for other requests.  Also set
   lock->fl.c.flc_file to whichever file is available for TEST requests.
- change nlmsvc_testlock() to also not calculate the mode, but to use
  whenever was stored in lock->fl.c.flc_file.

This behaviour of lockd - requesting O_WRONLY access to TEST for
exclusive locks - has been present at least since git history began.
However it was hidden until recently because knfsd ignored the access
requested by lockd and required only READ access for all locking
requests (unless the underlying filesystem provided an f_op->open
function which checked access permissions).

The commit mentioned in Fixes: below changed nfsd_permission() to NOT
override the access request for LOCK requests and this exposed the bug
that we are now fixing.

Note that there is another issue that this patch does not address.
The flock(.., LOCK_EX) call is permitted on a read-only file descriptor.
Linux NFS maps this to NLM locking as whole-file byte-range locks.
nfsd will see this as though it were fcntl( F_SETLK (F_WRLCK)) and will
now require write access, which it might not be able to get.
It is not clear if this is a problem in practice, or what the best
solution might be.  So no attempt is made to address it.

Reported-by: Tj <[email protected]>
Link:  https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1128861
Fixes: 4cc9b9f2bf4d ("nfsd: refine and rename NFSD_MAY_LOCK")
Reviewed-by: Jeff Layton <[email protected]>
Signed-off-by: NeilBrown <[email protected]>
---
 fs/lockd/svc4proc.c         | 13 ++++++++++---
 fs/lockd/svclock.c          |  4 +---
 fs/lockd/svcproc.c          | 15 ++++++++++++---
 fs/lockd/svcsubs.c          | 28 +++++++++++++++++++---------
 include/linux/lockd/lockd.h |  2 +-
 5 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4b6f18d97734..75e020a8bfd0 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -26,6 +26,8 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args 
*argp,
        struct nlm_host         *host = NULL;
        struct nlm_file         *file = NULL;
        struct nlm_lock         *lock = &argp->lock;
+       bool                    is_test = (rqstp->rq_proc == NLMPROC_TEST ||
+                                          rqstp->rq_proc == NLMPROC_TEST_MSG);
        __be32                  error = 0;
 
        /* nfsd callbacks must have been installed for this procedure */
@@ -46,15 +48,20 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct 
nlm_args *argp,
        if (filp != NULL) {
                int mode = lock_to_openmode(&lock->fl);
 
+               if (is_test)
+                       mode = O_RDWR;
+
                lock->fl.c.flc_flags = FL_POSIX;
 
-               error = nlm_lookup_file(rqstp, &file, lock);
+               error = nlm_lookup_file(rqstp, &file, lock, mode);
                if (error)
                        goto no_locks;
                *filp = file;
-
                /* Set up the missing parts of the file_lock structure */
-               lock->fl.c.flc_file = file->f_file[mode];
+               if (is_test)
+                       lock->fl.c.flc_file = nlmsvc_file_file(file);
+               else
+                       lock->fl.c.flc_file = file->f_file[mode];
                lock->fl.c.flc_pid = current->tgid;
                lock->fl.fl_start = (loff_t)lock->lock_start;
                lock->fl.fl_end = lock->lock_len ?
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 255a847ca0b6..adfd8c072898 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -614,7 +614,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file 
*file,
                struct nlm_lock *conflock)
 {
        int                     error;
-       int                     mode;
        __be32                  ret;
 
        dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
@@ -632,14 +631,13 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file 
*file,
                goto out;
        }
 
-       mode = lock_to_openmode(&lock->fl);
        locks_init_lock(&conflock->fl);
        /* vfs_test_lock only uses start, end, and owner, but tests flc_file */
        conflock->fl.c.flc_file = lock->fl.c.flc_file;
        conflock->fl.fl_start = lock->fl.fl_start;
        conflock->fl.fl_end = lock->fl.fl_end;
        conflock->fl.c.flc_owner = lock->fl.c.flc_owner;
-       error = vfs_test_lock(file->f_file[mode], &conflock->fl);
+       error = vfs_test_lock(lock->fl.c.flc_file, &conflock->fl);
        if (error) {
                ret = nlm_lck_denied_nolocks;
                goto out;
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 5817ef272332..d98e8d684376 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -55,6 +55,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args 
*argp,
        struct nlm_host         *host = NULL;
        struct nlm_file         *file = NULL;
        struct nlm_lock         *lock = &argp->lock;
+       bool                    is_test = (rqstp->rq_proc == NLMPROC_TEST ||
+                                          rqstp->rq_proc == NLMPROC_TEST_MSG);
        int                     mode;
        __be32                  error = 0;
 
@@ -70,15 +72,22 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct 
nlm_args *argp,
 
        /* Obtain file pointer. Not used by FREE_ALL call. */
        if (filp != NULL) {
-               error = cast_status(nlm_lookup_file(rqstp, &file, lock));
+               mode = lock_to_openmode(&lock->fl);
+
+               if (is_test)
+                       mode = O_RDWR;
+
+               error = cast_status(nlm_lookup_file(rqstp, &file, lock, mode));
                if (error != 0)
                        goto no_locks;
                *filp = file;
 
                /* Set up the missing parts of the file_lock structure */
-               mode = lock_to_openmode(&lock->fl);
                lock->fl.c.flc_flags = FL_POSIX;
-               lock->fl.c.flc_file  = file->f_file[mode];
+               if (is_test)
+                       lock->fl.c.flc_file = nlmsvc_file_file(file);
+               else
+                       lock->fl.c.flc_file = file->f_file[mode];
                lock->fl.c.flc_pid = current->tgid;
                lock->fl.fl_lmops = &nlmsvc_lock_operations;
                nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index dd0214dcb695..865ff6844743 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -82,18 +82,30 @@ int lock_to_openmode(struct file_lock *lock)
  *
  * We have to make sure we have the right credential to open
  * the file.
+ *
+ * mode can be O_RDONLY(0), O_WRONLY(1) or O_RDWR(2).
+ * The latter means succecss can be achieved with EITHER O_RDONLY or
+ * O_WRONLY.  It does NOT mean both read and write are required.
  */
 static __be32 nlm_do_fopen(struct svc_rqst *rqstp,
                           struct nlm_file *file, int mode)
 {
-       struct file **fp = &file->f_file[mode];
+       struct file **fp;
        __be32  nfserr;
+       int m;
 
-       if (*fp)
-               return 0;
-       nfserr = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, mode);
-       if (nfserr)
-               dprintk("lockd: open failed (error %d)\n", nfserr);
+       for (m = O_RDONLY ; m <= O_WRONLY ; m++) {
+               if (mode != O_RDWR && mode != m)
+                       continue;
+
+               fp = &file->f_file[m];
+               if (*fp)
+                       return 0;
+               nfserr = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, m);
+               if (!nfserr)
+                       return 0;
+       }
+       dprintk("lockd: open failed (error %d)\n", nfserr);
        return nfserr;
 }
 
@@ -103,17 +115,15 @@ static __be32 nlm_do_fopen(struct svc_rqst *rqstp,
  */
 __be32
 nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
-                                       struct nlm_lock *lock)
+               struct nlm_lock *lock, int mode)
 {
        struct nlm_file *file;
        unsigned int    hash;
        __be32          nfserr;
-       int             mode;
 
        nlm_debug_print_fh("nlm_lookup_file", &lock->fh);
 
        hash = file_hash(&lock->fh);
-       mode = lock_to_openmode(&lock->fl);
 
        /* Lock file table */
        mutex_lock(&nlm_file_mutex);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 330e38776bb2..fe5cdd4d66f4 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -294,7 +294,7 @@ void                  nlmsvc_locks_init_private(struct 
file_lock *, struct nlm_host *, pid_t);
  * File handling for the server personality
  */
 __be32           nlm_lookup_file(struct svc_rqst *, struct nlm_file **,
-                                       struct nlm_lock *);
+                                 struct nlm_lock *, int);
 void             nlm_release_file(struct nlm_file *);
 void             nlmsvc_put_lockowner(struct nlm_lockowner *);
 void             nlmsvc_release_lockowner(struct nlm_lock *);
-- 
2.50.0.107.gf914562f5916.dirty

Reply via email to