On 2020/08/11 4:29, Andrea Arcangeli wrote:
> However once the mutex is killable there's no concern anymore and the
> hangcheck timer is correct also not reporting any misbehavior anymore.

Do you mean something like below untested patch? I think that the difficult
part is that mutex for close() operation can't become killable. And I worry
that syzbot soon reports a hung task at pipe_release() instead of pipe_read()
or pipe_write(). If pagefault with locks held can be avoided, there will be no
such worry.

 fs/pipe.c                 | 106 ++++++++++++++++++++++++++++++++++++++--------
 fs/splice.c               |  41 ++++++++++++------
 include/linux/pipe_fs_i.h |   5 ++-
 3 files changed, 120 insertions(+), 32 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 60dbee4..537d1ef 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -66,6 +66,13 @@ static void pipe_lock_nested(struct pipe_inode_info *pipe, 
int subclass)
                mutex_lock_nested(&pipe->mutex, subclass);
 }
 
+static int __must_check pipe_lock_killable_nested(struct pipe_inode_info 
*pipe, int subclass)
+{
+       if (pipe->files)
+               return mutex_lock_killable_nested(&pipe->mutex, subclass);
+       return 0;
+}
+
 void pipe_lock(struct pipe_inode_info *pipe)
 {
        /*
@@ -75,6 +82,14 @@ void pipe_lock(struct pipe_inode_info *pipe)
 }
 EXPORT_SYMBOL(pipe_lock);
 
+int pipe_lock_killable(struct pipe_inode_info *pipe)
+{
+       /*
+        * pipe_lock() nests non-pipe inode locks (for writing to a file)
+        */
+       return pipe_lock_killable_nested(pipe, I_MUTEX_PARENT);
+}
+
 void pipe_unlock(struct pipe_inode_info *pipe)
 {
        if (pipe->files)
@@ -87,23 +102,37 @@ static inline void __pipe_lock(struct pipe_inode_info 
*pipe)
        mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
 }
 
+static inline int __must_check __pipe_lock_killable(struct pipe_inode_info 
*pipe)
+{
+       return mutex_lock_killable_nested(&pipe->mutex, I_MUTEX_PARENT);
+}
+
 static inline void __pipe_unlock(struct pipe_inode_info *pipe)
 {
        mutex_unlock(&pipe->mutex);
 }
 
-void pipe_double_lock(struct pipe_inode_info *pipe1,
-                     struct pipe_inode_info *pipe2)
+int pipe_double_lock_killable(struct pipe_inode_info *pipe1,
+                             struct pipe_inode_info *pipe2)
 {
        BUG_ON(pipe1 == pipe2);
 
        if (pipe1 < pipe2) {
-               pipe_lock_nested(pipe1, I_MUTEX_PARENT);
-               pipe_lock_nested(pipe2, I_MUTEX_CHILD);
+               if (pipe_lock_killable_nested(pipe1, I_MUTEX_PARENT))
+                       return -ERESTARTSYS;
+               if (pipe_lock_killable_nested(pipe2, I_MUTEX_CHILD)) {
+                       pipe_unlock(pipe1);
+                       return -ERESTARTSYS;
+               }
        } else {
-               pipe_lock_nested(pipe2, I_MUTEX_PARENT);
-               pipe_lock_nested(pipe1, I_MUTEX_CHILD);
+               if (pipe_lock_killable_nested(pipe2, I_MUTEX_PARENT))
+                       return -ERESTARTSYS;
+               if (pipe_lock_killable_nested(pipe1, I_MUTEX_CHILD)) {
+                       pipe_unlock(pipe2);
+                       return -ERESTARTSYS;
+               }
        }
+       return 0;
 }
 
 /* Drop the inode semaphore and wait for a pipe event, atomically */
@@ -125,6 +154,24 @@ void pipe_wait(struct pipe_inode_info *pipe)
        pipe_lock(pipe);
 }
 
+int pipe_wait_killable(struct pipe_inode_info *pipe)
+{
+       DEFINE_WAIT(rdwait);
+       DEFINE_WAIT(wrwait);
+
+       /*
+        * Pipes are system-local resources, so sleeping on them
+        * is considered a noninteractive wait:
+        */
+       prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
+       prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE);
+       pipe_unlock(pipe);
+       schedule();
+       finish_wait(&pipe->rd_wait, &rdwait);
+       finish_wait(&pipe->wr_wait, &wrwait);
+       return pipe_lock_killable(pipe);
+}
+
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
                                  struct pipe_buffer *buf)
 {
@@ -244,7 +291,8 @@ static inline bool pipe_readable(const struct 
pipe_inode_info *pipe)
                return 0;
 
        ret = 0;
-       __pipe_lock(pipe);
+       if (__pipe_lock_killable(pipe))
+               return -ERESTARTSYS;
 
        /*
         * We only wake up writers if the pipe was full when we started
@@ -381,7 +429,8 @@ static inline bool pipe_readable(const struct 
pipe_inode_info *pipe)
                if (wait_event_interruptible_exclusive(pipe->rd_wait, 
pipe_readable(pipe)) < 0)
                        return -ERESTARTSYS;
 
-               __pipe_lock(pipe);
+               if (__pipe_lock_killable(pipe))
+                       return -ERESTARTSYS;
                was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
                wake_next_reader = true;
        }
@@ -432,7 +481,8 @@ static inline bool pipe_writable(const struct 
pipe_inode_info *pipe)
        if (unlikely(total_len == 0))
                return 0;
 
-       __pipe_lock(pipe);
+       if (__pipe_lock_killable(pipe))
+               return -ERESTARTSYS;
 
        if (!pipe->readers) {
                send_sig(SIGPIPE, current, 0);
@@ -577,7 +627,14 @@ static inline bool pipe_writable(const struct 
pipe_inode_info *pipe)
                        kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
                }
                wait_event_interruptible_exclusive(pipe->wr_wait, 
pipe_writable(pipe));
-               __pipe_lock(pipe);
+               if (__pipe_lock_killable(pipe)) {
+                       if (!ret)
+                               ret = -ERESTARTSYS;
+                       /* Extra notification is better than missing 
notification? */
+                       was_empty = true;
+                       wake_next_writer = true;
+                       goto out_unlocked;
+               }
                was_empty = pipe_empty(pipe->head, pipe->tail);
                wake_next_writer = true;
        }
@@ -586,6 +643,7 @@ static inline bool pipe_writable(const struct 
pipe_inode_info *pipe)
                wake_next_writer = false;
        __pipe_unlock(pipe);
 
+out_unlocked:
        /*
         * If we do do a wakeup event, we do a 'sync' wakeup, because we
         * want the reader to start processing things asap, rather than
@@ -617,7 +675,8 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, 
unsigned long arg)
 
        switch (cmd) {
        case FIONREAD:
-               __pipe_lock(pipe);
+               if (__pipe_lock_killable(pipe))
+                       return -ERESTARTSYS;
                count = 0;
                head = pipe->head;
                tail = pipe->tail;
@@ -634,7 +693,8 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, 
unsigned long arg)
 #ifdef CONFIG_WATCH_QUEUE
        case IOC_WATCH_QUEUE_SET_SIZE: {
                int ret;
-               __pipe_lock(pipe);
+               if (__pipe_lock_killable(pipe))
+                       return -ERESTARTSYS;
                ret = watch_queue_set_size(pipe, arg);
                __pipe_unlock(pipe);
                return ret;
@@ -719,6 +779,10 @@ static void put_pipe_info(struct inode *inode, struct 
pipe_inode_info *pipe)
 {
        struct pipe_inode_info *pipe = file->private_data;
 
+       /*
+        * Think lock can't be killable. How to avoid deadlock if page fault
+        * with pipe mutex held does not finish quickly?
+        */
        __pipe_lock(pipe);
        if (file->f_mode & FMODE_READ)
                pipe->readers--;
@@ -744,7 +808,8 @@ static void put_pipe_info(struct inode *inode, struct 
pipe_inode_info *pipe)
        struct pipe_inode_info *pipe = filp->private_data;
        int retval = 0;
 
-       __pipe_lock(pipe);
+       if (__pipe_lock_killable(pipe)) /* Can this be safely killable? */
+               return -ERESTARTSYS;
        if (filp->f_mode & FMODE_READ)
                retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
        if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
@@ -1040,7 +1105,8 @@ static int wait_for_partner(struct pipe_inode_info *pipe, 
unsigned int *cnt)
        int cur = *cnt;
 
        while (cur == *cnt) {
-               pipe_wait(pipe);
+               if (pipe_wait_killable(pipe))
+                       break;
                if (signal_pending(current))
                        break;
        }
@@ -1083,10 +1149,13 @@ static int fifo_open(struct inode *inode, struct file 
*filp)
                        spin_unlock(&inode->i_lock);
                }
        }
-       filp->private_data = pipe;
-       /* OK, we have a pipe and it's pinned down */
 
-       __pipe_lock(pipe);
+       /* OK, we have a pipe and it's pinned down */
+       if (__pipe_lock_killable(pipe)) {
+               put_pipe_info(inode, pipe);
+               return -ERESTARTSYS;
+       }
+       filp->private_data = pipe;
 
        /* We can only do regular read/write on fifos */
        stream_open(inode, filp);
@@ -1349,7 +1418,8 @@ long pipe_fcntl(struct file *file, unsigned int cmd, 
unsigned long arg)
        if (!pipe)
                return -EBADF;
 
-       __pipe_lock(pipe);
+       if (__pipe_lock_killable(pipe))
+               return -ERESTARTSYS;
 
        switch (cmd) {
        case F_SETPIPE_SZ:
diff --git a/fs/splice.c b/fs/splice.c
index d7c8a7c..65d24df 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -563,7 +563,8 @@ static int splice_from_pipe_next(struct pipe_inode_info 
*pipe, struct splice_des
                        sd->need_wakeup = false;
                }
 
-               pipe_wait(pipe);
+               if (pipe_wait_killable(pipe))
+                       return -ERESTARTSYS;
        }
 
        return 1;
@@ -657,7 +658,8 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, 
struct file *out,
                .u.file = out,
        };
 
-       pipe_lock(pipe);
+       if (pipe_lock_killable(pipe))
+               return -ERESTARTSYS;
        ret = __splice_from_pipe(pipe, &sd, actor);
        pipe_unlock(pipe);
 
@@ -696,7 +698,10 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, 
struct file *out,
        if (unlikely(!array))
                return -ENOMEM;
 
-       pipe_lock(pipe);
+       if (pipe_lock_killable(pipe)) {
+               kfree(array);
+               return -ERESTARTSYS;
+       }
 
        splice_from_pipe_begin(&sd);
        while (sd.total_len) {
@@ -1077,7 +1082,8 @@ static int wait_for_space(struct pipe_inode_info *pipe, 
unsigned flags)
                        return -EAGAIN;
                if (signal_pending(current))
                        return -ERESTARTSYS;
-               pipe_wait(pipe);
+               if (pipe_wait_killable(pipe))
+                       return -ERESTARTSYS;
        }
 }
 
@@ -1167,7 +1173,8 @@ long do_splice(struct file *in, loff_t __user *off_in,
                if (out->f_flags & O_NONBLOCK)
                        flags |= SPLICE_F_NONBLOCK;
 
-               pipe_lock(opipe);
+               if (pipe_lock_killable(opipe))
+                       return -ERESTARTSYS;
                ret = wait_for_space(opipe, flags);
                if (!ret) {
                        unsigned int p_space;
@@ -1264,7 +1271,8 @@ static long vmsplice_to_user(struct file *file, struct 
iov_iter *iter,
                return -EBADF;
 
        if (sd.total_len) {
-               pipe_lock(pipe);
+               if (pipe_lock_killable(pipe))
+                       return -ERESTARTSYS;
                ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
                pipe_unlock(pipe);
        }
@@ -1291,7 +1299,8 @@ static long vmsplice_to_pipe(struct file *file, struct 
iov_iter *iter,
        if (!pipe)
                return -EBADF;
 
-       pipe_lock(pipe);
+       if (pipe_lock_killable(pipe))
+               return -ERESTARTSYS;
        ret = wait_for_space(pipe, flags);
        if (!ret)
                ret = iter_to_pipe(iter, pipe, buf_flag);
@@ -1441,7 +1450,8 @@ static int ipipe_prep(struct pipe_inode_info *pipe, 
unsigned int flags)
                return 0;
 
        ret = 0;
-       pipe_lock(pipe);
+       if (pipe_lock_killable(pipe))
+               return -ERESTARTSYS;
 
        while (pipe_empty(pipe->head, pipe->tail)) {
                if (signal_pending(current)) {
@@ -1454,7 +1464,8 @@ static int ipipe_prep(struct pipe_inode_info *pipe, 
unsigned int flags)
                        ret = -EAGAIN;
                        break;
                }
-               pipe_wait(pipe);
+               if (pipe_wait_killable(pipe))
+                       return -ERESTARTSYS;
        }
 
        pipe_unlock(pipe);
@@ -1477,7 +1488,8 @@ static int opipe_prep(struct pipe_inode_info *pipe, 
unsigned int flags)
                return 0;
 
        ret = 0;
-       pipe_lock(pipe);
+       if (pipe_lock_killable(pipe))
+               return -ERESTARTSYS;
 
        while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
                if (!pipe->readers) {
@@ -1493,7 +1505,8 @@ static int opipe_prep(struct pipe_inode_info *pipe, 
unsigned int flags)
                        ret = -ERESTARTSYS;
                        break;
                }
-               pipe_wait(pipe);
+               if (pipe_wait_killable(pipe))
+                       return -ERESTARTSYS;
        }
 
        pipe_unlock(pipe);
@@ -1529,7 +1542,8 @@ static int splice_pipe_to_pipe(struct pipe_inode_info 
*ipipe,
         * grabbing by pipe info address. Otherwise two different processes
         * could deadlock (one doing tee from A -> B, the other from B -> A).
         */
-       pipe_double_lock(ipipe, opipe);
+       if (pipe_double_lock_killable(ipipe, opipe))
+               return -ERESTARTSYS;
 
        i_tail = ipipe->tail;
        i_mask = ipipe->ring_size - 1;
@@ -1655,7 +1669,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
         * grabbing by pipe info address. Otherwise two different processes
         * could deadlock (one doing tee from A -> B, the other from B -> A).
         */
-       pipe_double_lock(ipipe, opipe);
+       if (pipe_double_lock_killable(ipipe, opipe))
+               return -ERESTARTSYS;
 
        i_tail = ipipe->tail;
        i_mask = ipipe->ring_size - 1;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 50afd0d..eb99c18 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -233,8 +233,10 @@ static inline bool pipe_buf_try_steal(struct 
pipe_inode_info *pipe,
 
 /* Pipe lock and unlock operations */
 void pipe_lock(struct pipe_inode_info *);
+int __must_check pipe_lock_killable(struct pipe_inode_info *pipe);
 void pipe_unlock(struct pipe_inode_info *);
-void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
+int __must_check pipe_double_lock_killable(struct pipe_inode_info *pipe1,
+                                          struct pipe_inode_info *pipe2);
 
 extern unsigned int pipe_max_size;
 extern unsigned long pipe_user_pages_hard;
@@ -242,6 +244,7 @@ static inline bool pipe_buf_try_steal(struct 
pipe_inode_info *pipe,
 
 /* Drop the inode semaphore and wait for a pipe event, atomically */
 void pipe_wait(struct pipe_inode_info *pipe);
+int __must_check pipe_wait_killable(struct pipe_inode_info *pipe);
 
 struct pipe_inode_info *alloc_pipe_info(void);
 void free_pipe_info(struct pipe_inode_info *);
-- 
1.8.3.1

Reply via email to