Hi, Jeff,

On 17.04.2018 14:42, Jeff Layton wrote:
> On Thu, 2018-04-05 at 14:58 +0300, Kirill Tkhai wrote:
>> I observed the following deadlock between them:
>>
>> [task 1]                          [task 2]                         [task 3]
>> kill_fasync()                     mm_update_next_owner()           
>> copy_process()
>>  spin_lock_irqsave(&fa->fa_lock)   read_lock(&tasklist_lock)        
>> write_lock_irq(&tasklist_lock)
>>   send_sigio()                    <IRQ>                             ...
>>    read_lock(&fown->lock)         kill_fasync()                     ...
>>     read_lock(&tasklist_lock)      spin_lock_irqsave(&fa->fa_lock)  ...
>>
>> Task 1 can't acquire read locked tasklist_lock, since there is
>> already task 3 expressed its wish to take the lock exclusive.
>> Task 2 holds the read locked lock, but it can't take the spin lock.
>>
>> Also, there is possible another deadlock (which I haven't observed):
>>
>> [task 1]                            [task 2]
>> f_getown()                          kill_fasync()
>>  read_lock(&f_own->lock)             spin_lock_irqsave(&fa->fa_lock,)
>>  <IRQ>                               send_sigio()                     
>> write_lock_irq(&f_own->lock)
>>   kill_fasync()                       read_lock(&fown->lock)
>>    spin_lock_irqsave(&fa->fa_lock,)
>>
>> Actually, we do not need exclusive fa->fa_lock in kill_fasync_rcu(),
>> as it guarantees fa->fa_file->f_owner integrity only. It may seem,
>> that it used to give a task a small possibility to receive two sequential
>> signals, if there are two parallel kill_fasync() callers, and task
>> handles the first signal fastly, but the behaviour won't become
>> different, since there is exclusive sighand lock in do_send_sig_info().
>>
>> The patch converts fa_lock into rwlock_t, and this fixes two above
>> deadlocks, as rwlock is allowed to be taken from interrupt handler
>> by qrwlock design.
>>
>> Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com>
>>
>> I used the following program for testing:
>>
>> #include <unistd.h>
>> #include <stdlib.h>
>> #include <signal.h>
>> #include <fcntl.h>
>> #include <errno.h>
>> #include <stdio.h>
>>
>> #ifndef F_SETSIG
>> #define F_SETSIG 10
>> #endif
>>
>> void handler(int sig)
>> {
>> }
>>
>> main()
>> {
>>      unsigned int flags;
>>      int fd;
>>
>>      system("echo 8 > /proc/sys/kernel/random/read_wakeup_threshold");
>>      system("while :; do ls -R / > /dev/random 2>&1 ; echo 3 > 
>> /proc/sys/vm/drop_caches; done &");
>>
>>      if (signal(SIGINT, handler) < 0) {
>>              perror("Signal");
>>              exit(1);
>>      }
>>
>>      fd = open("/dev/random", O_RDWR);
>>      if (fd < 0) {
>>              perror("Can't open");
>>              exit(1);
>>      }
>>
>>      flags = FASYNC | fcntl(fd, F_GETFL);
>>      if (fcntl(fd, F_SETFL, flags) < 0) {
>>              perror("Setfl");
>>              exit(1);
>>      }
>>      if (fcntl(fd, F_SETOWN, getpid())) {
>>              perror("Setown");
>>              exit(1);
>>      }
>>      if (fcntl(fd, F_SETSIG, SIGINT)) {
>>              perror("Setsig");
>>              exit(1);
>>      }
>>
>>      while (1)
>>              sleep(100);
>> }
>> ---
>>  fs/fcntl.c         |   15 +++++++--------
>>  include/linux/fs.h |    2 +-
>>  2 files changed, 8 insertions(+), 9 deletions(-)
>>
>> diff --git a/fs/fcntl.c b/fs/fcntl.c
>> index 1e97f1fda90c..780161a11f9d 100644
>> --- a/fs/fcntl.c
>> +++ b/fs/fcntl.c
>> @@ -865,9 +865,9 @@ int fasync_remove_entry(struct file *filp, struct 
>> fasync_struct **fapp)
>>              if (fa->fa_file != filp)
>>                      continue;
>>  
>> -            spin_lock_irq(&fa->fa_lock);
>> +            write_lock_irq(&fa->fa_lock);
>>              fa->fa_file = NULL;
>> -            spin_unlock_irq(&fa->fa_lock);
>> +            write_unlock_irq(&fa->fa_lock);
>>  
>>              *fp = fa->fa_next;
>>              call_rcu(&fa->fa_rcu, fasync_free_rcu);
>> @@ -912,13 +912,13 @@ struct fasync_struct *fasync_insert_entry(int fd, 
>> struct file *filp, struct fasy
>>              if (fa->fa_file != filp)
>>                      continue;
>>  
>> -            spin_lock_irq(&fa->fa_lock);
>> +            write_lock_irq(&fa->fa_lock);
>>              fa->fa_fd = fd;
>> -            spin_unlock_irq(&fa->fa_lock);
>> +            write_unlock_irq(&fa->fa_lock);
>>              goto out;
>>      }
>>  
>> -    spin_lock_init(&new->fa_lock);
>> +    rwlock_init(&new->fa_lock);
>>      new->magic = FASYNC_MAGIC;
>>      new->fa_file = filp;
>>      new->fa_fd = fd;
>> @@ -981,14 +981,13 @@ static void kill_fasync_rcu(struct fasync_struct *fa, 
>> int sig, int band)
>>  {
>>      while (fa) {
>>              struct fown_struct *fown;
>> -            unsigned long flags;
>>  
>>              if (fa->magic != FASYNC_MAGIC) {
>>                      printk(KERN_ERR "kill_fasync: bad magic number in "
>>                             "fasync_struct!\n");
>>                      return;
>>              }
>> -            spin_lock_irqsave(&fa->fa_lock, flags);
>> +            read_lock(&fa->fa_lock);
> 
> Does this need to be read_lock_irq? Why is it ok to allow interrupts
> here?

Read locked rwlock can be taken for reading from IRQ once again even
if there is a writer pending, while spin lock can't:

void queued_read_lock_slowpath(struct qrwlock *lock)
{
        /*
         * Readers come here when they cannot get the lock without waiting
         */
        if (unlikely(in_interrupt())) {
                /*
                 * Readers in interrupt context will get the lock immediately
                 * if the writer is just waiting (not holding the lock yet),
                 * so spin with ACQUIRE semantics until the lock is available
                 * without waiting in the queue.
                 */
                atomic_cond_read_acquire(&lock->cnts, !(VAL & _QW_LOCKED));
                return;
        }

So, when we replace spinlock with read_lock(), we don't need disable IRQs 
anymore.
All we need is to make write_lock always disable IRQs.

>>              if (fa->fa_file) {
>>                      fown = &fa->fa_file->f_owner;
>>                      /* Don't send SIGURG to processes which have not set a
>> @@ -997,7 +996,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, 
>> int sig, int band)
>>                      if (!(sig == SIGURG && fown->signum == 0))
>>                              send_sigio(fown, fa->fa_fd, band);
>>              }
>> -            spin_unlock_irqrestore(&fa->fa_lock, flags);
>> +            read_unlock(&fa->fa_lock);
>>              fa = rcu_dereference(fa->fa_next);
>>      }
>>  }
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index c6baf767619e..297e2dcd9dd2 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -1250,7 +1250,7 @@ static inline int locks_lock_file_wait(struct file 
>> *filp, struct file_lock *fl)
>>  }
>>  
>>  struct fasync_struct {
>> -    spinlock_t              fa_lock;
>> +    rwlock_t                fa_lock;
>>      int                     magic;
>>      int                     fa_fd;
>>      struct fasync_struct    *fa_next; /* singly linked list */
>>
> 
> I've no objection to the patch in principle, but I'm not as familiar
> with the fasync code as others here.

I took the reviewers list from MAINTAINERS and ./scripts/get_maintainer.pl,
don't have an ideas what else should be CCed.

Thanks,
Kirill

Reply via email to