В Чт, 02/10/2014 в 11:34 +0200, Peter Zijlstra пишет:
> On Wed, Oct 01, 2014 at 01:04:22AM +0400, Kirill Tkhai wrote:
> > From: Kirill Tkhai <ktk...@parallels.com>
> > 
> > hrtimer_try_to_cancel() may bring a suprise, its call may fail.
> 
> Well, not really a surprise that, its a _try_ operation after all.
> 
> > raw_spin_lock(&rq->lock)
> > ...                            dl_task_timer                 
> > raw_spin_lock(&rq->lock)
> > ...                               raw_spin_lock(&rq->lock)   ...
> >    switched_from_dl()             ...                        ...
> >       hrtimer_try_to_cancel()     ...                        ...
> >    switched_to_fair()             ...                        ...
> > ...                               ...                        ...
> > ...                               ...                        ...
> > raw_spin_unlock(&rq->lock)        ...                        (asquired)
> > ...                               ...                        ...
> > ...                               ...                        ...
> > do_exit()                         ...                        ...
> >    schedule()                     ...                        ...
> >       raw_spin_lock(&rq->lock)    ...                        
> > raw_spin_unlock(&rq->lock)
> >       ...                         ...                        ...
> >       raw_spin_unlock(&rq->lock)  ...                        
> > raw_spin_lock(&rq->lock)
> >       ...                         ...                        (asquired)
> >       put_task_struct()           ...                        ...
> >           free_task_struct()      ...                        ...
> >       ...                         ...                        
> > raw_spin_unlock(&rq->lock)
> > ...                               (asquired)                 ...
> > ...                               ...                        ...
> > ...                               Surprise!!!                ...
> > 
> > So, let's implement 100% guaranteed way to cancel the timer and let's
> > be sure we are safe even in very unlikely situations.
> > 
> > We do not create any problem with rq unlocking, because it already
> > may happed below in pull_dl_task(). No problem with deadline tasks
> > balancing too.
> 
> That doesn't sound right. pull_dl_task() is an entirely different
> callchain than switched_from(). Now it might still be fine, but you
> cannot compare it with pull_dl_task.

I mean that caller of switched_from_dl() already knows about this situation,
and we do not limit the area of its use.

Does this sound better?

[PATCH] sched/dl: Implement cancel_dl_timer() to use in switched_from_dl()
    
Currently used hrtimer_try_to_cancel() is racy:

raw_spin_lock(&rq->lock)
...                            dl_task_timer                 
raw_spin_lock(&rq->lock)
...                               raw_spin_lock(&rq->lock)   ...
   switched_from_dl()             ...                        ...
      hrtimer_try_to_cancel()     ...                        ...
   switched_to_fair()             ...                        ...
...                               ...                        ...
...                               ...                        ...
raw_spin_unlock(&rq->lock)        ...                        (asquired)
...                               ...                        ...
...                               ...                        ...
do_exit()                         ...                        ...
   schedule()                     ...                        ...
      raw_spin_lock(&rq->lock)    ...                        
raw_spin_unlock(&rq->lock)
      ...                         ...                        ...
      raw_spin_unlock(&rq->lock)  ...                        
raw_spin_lock(&rq->lock)
      ...                         ...                        (asquired)
      put_task_struct()           ...                        ...
          free_task_struct()      ...                        ...
      ...                         ...                        
raw_spin_unlock(&rq->lock)
...                               (asquired)                 ...
...                               ...                        ...
...                               (use after free)           ...

    
So, let's implement 100% guaranteed way to cancel the timer and let's
be sure we are safe even in very unlikely situations.

rq unlocking does not limit the area of switched_from_dl() use, because
it already was possible in pull_dl_task() below.

Signed-off-by: Kirill Tkhai <ktk...@parallels.com>

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index abfaf3d..63f8b4a 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -555,11 +555,6 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
 {
        struct hrtimer *timer = &dl_se->dl_timer;
 
-       if (hrtimer_active(timer)) {
-               hrtimer_try_to_cancel(timer);
-               return;
-       }
-
        hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        timer->function = dl_task_timer;
 }
@@ -1567,10 +1562,34 @@ void init_sched_dl_class(void)
 
 #endif /* CONFIG_SMP */
 
+/*
+ *  Surely cancel task's dl_timer. May drop rq->lock.
+ */
+static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
+{
+       struct hrtimer *dl_timer = &p->dl.dl_timer;
+
+       /* Nobody will change task's class if pi_lock is held */
+       lockdep_assert_held(&p->pi_lock);
+
+       if (hrtimer_active(dl_timer)) {
+               int ret = hrtimer_try_to_cancel(dl_timer);
+
+               if (unlikely(ret == -1)) {
+                       /*
+                        * Note, p may migrate OR new deadline tasks
+                        * may appear in rq when we are unlocking it.
+                        */
+                       raw_spin_unlock(&rq->lock);
+                       hrtimer_cancel(dl_timer);
+                       raw_spin_lock(&rq->lock);
+               }
+       }
+}
+
 static void switched_from_dl(struct rq *rq, struct task_struct *p)
 {
-       if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy))
-               hrtimer_try_to_cancel(&p->dl.dl_timer);
+       cancel_dl_timer(rq, p);
 
        __dl_clear_params(p);
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to