Oleg,

On Mon, 2005-08-22 at 12:52 +0400, Oleg Nesterov wrote:
> Thomas Gleixner wrote:
> I think yes, and this is closely related to your and Paul
> "Use RCU to protect tasklist for unicast signals" patches.
> 
> We don't need RCU here, but we do need this hypothetical
> lock_task_sighand() (and __exit_sighand from __exit_signal
> change).
> 
> We still need tasklist_lock in send_group_queue for
> __group_complete_signal though. I hope Paul will solve
> this, it is needed for group_send_info() anyway.
> 
> But for the near future I don't see simple and non-intrusive
> fix for this deadlock. I am waiting for George to confirm
> that the bug exists and we are not crazy :)

It exists. It triggers on preempt-RT and I can trigger it on vanilla SMP
by waiting for the timer expiry in release_task() before the
__exit_signal() call. That's reasonable, as it can happen that way by
chance too. It requires that the timer expires on a different CPU, but I
don't see a reason why this should not happen.

The patch below solves it for me. 

tglx

diff -uprN --exclude-from=/usr/local/bin/diffit.exclude 
linux-2.6.13-rc6/kernel/exit.c linux-2.6.13-rc6.signal/kernel/exit.c
--- linux-2.6.13-rc6/kernel/exit.c      2005-08-13 12:25:58.000000000 +0200
+++ linux-2.6.13-rc6.signal/kernel/exit.c       2005-08-22 11:19:02.000000000 
+0200
@@ -71,7 +71,6 @@ repeat: 
                __ptrace_unlink(p);
        BUG_ON(!list_empty(&p->ptrace_list) || 
!list_empty(&p->ptrace_children));
        __exit_signal(p);
-       __exit_sighand(p);
        /*
         * Note that the fastpath in sys_times depends on __exit_signal having
         * updated the counters before a task is removed from the tasklist of
diff -uprN --exclude-from=/usr/local/bin/diffit.exclude 
linux-2.6.13-rc6/kernel/fork.c linux-2.6.13-rc6.signal/kernel/fork.c
--- linux-2.6.13-rc6/kernel/fork.c      2005-08-13 12:25:58.000000000 +0200
+++ linux-2.6.13-rc6.signal/kernel/fork.c       2005-08-22 11:35:05.000000000 
+0200
@@ -1132,7 +1132,8 @@ bad_fork_cleanup_mm:
 bad_fork_cleanup_signal:
        exit_signal(p);
 bad_fork_cleanup_sighand:
-       exit_sighand(p);
+       if (p->sighand)
+               exit_sighand(p);
 bad_fork_cleanup_fs:
        exit_fs(p); /* blocking */
 bad_fork_cleanup_files:
diff -uprN --exclude-from=/usr/local/bin/diffit.exclude 
linux-2.6.13-rc6/kernel/posix-timers.c 
linux-2.6.13-rc6.signal/kernel/posix-timers.c
--- linux-2.6.13-rc6/kernel/posix-timers.c      2005-08-13 12:25:58.000000000 
+0200
+++ linux-2.6.13-rc6.signal/kernel/posix-timers.c       2005-08-21 
23:19:42.000000000 +0200
@@ -427,21 +427,23 @@ int posix_timer_event(struct k_itimer *t
        timr->sigq->info.si_code = SI_TIMER;
        timr->sigq->info.si_tid = timr->it_id;
        timr->sigq->info.si_value = timr->it_sigev_value;
+
        if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
-               if (unlikely(timr->it_process->flags & PF_EXITING)) {
-                       timr->it_sigev_notify = SIGEV_SIGNAL;
-                       put_task_struct(timr->it_process);
-                       timr->it_process = timr->it_process->group_leader;
-                       goto group;
-               }
-               return send_sigqueue(timr->it_sigev_signo, timr->sigq,
-                       timr->it_process);
-       }
-       else {
-       group:
-               return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
-                       timr->it_process);
+               struct task_struct *leader;
+               int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
+                                       timr->it_process);
+
+               if (likely(ret >= 0))
+                       return ret;
+
+               timr->it_sigev_notify = SIGEV_SIGNAL;
+               leader = timr->it_process->group_leader;
+               put_task_struct(timr->it_process);
+               timr->it_process = leader;
        }
+
+       return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
+                                  timr->it_process);
 }
 EXPORT_SYMBOL_GPL(posix_timer_event);
 
@@ -499,7 +501,8 @@ static void posix_timer_fn(unsigned long
                        remove_from_abslist(timr);
                }
 
-               if (posix_timer_event(timr, si_private))
+               /* Do not rearm the timer, when we are exiting */
+               if (posix_timer_event(timr, si_private) > 0)
                        /*
                         * signal was not sent because of sig_ignor
                         * we will not get a call back to restart it AND
diff -uprN --exclude-from=/usr/local/bin/diffit.exclude 
linux-2.6.13-rc6/kernel/signal.c linux-2.6.13-rc6.signal/kernel/signal.c
--- linux-2.6.13-rc6/kernel/signal.c    2005-08-13 12:25:58.000000000 +0200
+++ linux-2.6.13-rc6.signal/kernel/signal.c     2005-08-22 11:51:08.000000000 
+0200
@@ -395,6 +395,7 @@ void __exit_signal(struct task_struct *t
        }
        clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
        flush_sigqueue(&tsk->pending);
+       __exit_sighand(tsk);
        if (sig) {
                /*
                 * We are cleaning up the signal_struct here.  We delayed
@@ -1380,16 +1381,20 @@ send_sigqueue(int sig, struct sigqueue *
        unsigned long flags;
        int ret = 0;
 
-       /*
-        * We need the tasklist lock even for the specific
-        * thread case (when we don't need to follow the group
-        * lists) in order to avoid races with "p->sighand"
-        * going away or changing from under us.
-        */
        BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
-       read_lock(&tasklist_lock);  
+
+       while(!read_trylock(&tasklist_lock)) {
+               if (p->flags & PF_EXITING)
+                       return -1;
+               cpu_relax();
+       }
+       if (unlikely(p->flags & PF_EXITING)) {
+               ret = -1;
+               goto out_err;
+       }
+
        spin_lock_irqsave(&p->sighand->siglock, flags);
-       
+
        if (unlikely(!list_empty(&q->list))) {
                /*
                 * If an SI_TIMER entry is already queue just increment
@@ -1399,7 +1404,7 @@ send_sigqueue(int sig, struct sigqueue *
                        BUG();
                q->info.si_overrun++;
                goto out;
-       } 
+       }
        /* Short-circuit ignored signals.  */
        if (sig_ignored(p, sig)) {
                ret = 1;
@@ -1414,8 +1419,10 @@ send_sigqueue(int sig, struct sigqueue *
 
 out:
        spin_unlock_irqrestore(&p->sighand->siglock, flags);
+out_err:
        read_unlock(&tasklist_lock);
-       return(ret);
+
+       return ret;
 }
 
 int
@@ -1425,7 +1432,16 @@ send_group_sigqueue(int sig, struct sigq
        int ret = 0;
 
        BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
-       read_lock(&tasklist_lock);
+
+       while(!read_trylock(&tasklist_lock)) {
+               if (!p->sighand)
+                       return -1;
+               cpu_relax();
+       }
+       if (unlikely(!p->sighand)) {
+               ret = -1;
+               goto out_err;
+       }
        spin_lock_irqsave(&p->sighand->siglock, flags);
        handle_stop_signal(sig, p);
 
@@ -1459,8 +1475,9 @@ send_group_sigqueue(int sig, struct sigq
        __group_complete_signal(sig, p);
 out:
        spin_unlock_irqrestore(&p->sighand->siglock, flags);
+out_err:
        read_unlock(&tasklist_lock);
-       return(ret);
+       return ret;
 }
 
 /*


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to