--
On Fri, 12 Oct 2007, Peter Zijlstra wrote:

>
> On Fri, 2007-10-12 at 12:05 -0400, Steven Rostedt wrote:
>
> > Index: linux-2.6.23-rt1/kernel/sched.c
> > ===================================================================
> > --- linux-2.6.23-rt1.orig/kernel/sched.c
> > +++ linux-2.6.23-rt1/kernel/sched.c
> > @@ -304,6 +304,7 @@ struct rq {
> >  #ifdef CONFIG_PREEMPT_RT
> >     unsigned long rt_nr_running;
> >     unsigned long rt_nr_uninterruptible;
> > +   int curr_prio;
>
> still not convinced we want this PREEMPT_RT only.

Neither am I, but I want testing first ;-)

>
> >  #endif
> >
> >     unsigned long switch_timestamp;
> > @@ -1484,6 +1485,126 @@ next_in_queue:
> >
> >  static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
> >
> > +/* Only try this algorithm three times */
> > +#define RT_PUSH_MAX_TRIES 3
> > +
> > +/* Will lock the rq it finds */
> > +static int find_lowest_cpu(cpumask_t *cpu_mask, struct task_struct *task,
> > +                            struct rq *this_rq)
> > +{
>
> Eeew, asymetric locking. At the very least name it:
> find_lock_lowest_rq() or something like that.

Hehe, I originally had it that name (well without the lock). But I need
both the found cpu and the rq.  So instead of passing in a pointer to the
dst_cpu I just returned it, since it was simple to do a cpu_rq(). But if
you find this too grotesque, I can just return the cpu via a pointer.


>
> Might as well return struct rq* while we're at it [1].

That's what I originally did.

>
> > +   struct rq *lowest_rq = NULL;
> > +   int dst_cpu = -1;
> > +   int cpu;
> > +   int tries;
> > +
> > +   for (tries = 0; tries < RT_PUSH_MAX_TRIES; tries++) {
> > +           /*
> > +            * Scan each rq for the lowest prio.
> > +            */
> > +           for_each_cpu_mask(cpu, *cpu_mask) {
> > +                   struct rq *rq = &per_cpu(runqueues, cpu);
> > +
> > +                   if (cpu == smp_processor_id())
> > +                           continue;
> > +
> > +                   /* We look for lowest RT prio or non-rt CPU */
> > +                   if (rq->curr_prio >= MAX_RT_PRIO) {
> > +                           lowest_rq = rq;
> > +                           dst_cpu = cpu;
> > +                           break;
> > +                   }
> > +
> > +                   /* no locking for now */
> > +                   if (rq->curr_prio > task->prio &&
> > +                       (!lowest_rq || rq->curr_prio < 
> > lowest_rq->curr_prio)) {
> > +                           dst_cpu = cpu;
> > +                           lowest_rq = rq;
> > +                   }
> > +           }
> > +
> > +           if (!lowest_rq) {
> > +                   dst_cpu = -1;
> > +                   break;
> > +           }
> > +
> > +           /* if the prio of this runqueue changed, try again */
> > +           if (double_lock_balance(this_rq, lowest_rq)) {
> > +                   /*
> > +                    * We had to unlock the run queue. In
> > +                    * the mean time, task could have
> > +                    * migrated already or had its affinity changed.
> > +                    */
> > +                   if (unlikely(task_rq(task) != this_rq ||
> > +                                !cpu_isset(dst_cpu, task->cpus_allowed))) {
> > +                           spin_unlock(&lowest_rq->lock);
> > +                           dst_cpu = -1;
> > +                           lowest_rq = NULL;
> > +                           break;
> > +                   }
> > +
> > +           }
> > +
> > +           /* If this rq is still suitable use it. */
> > +           if (lowest_rq->curr_prio > task->prio)
> > +                   break;
> > +
> > +           /* try again */
> > +           spin_unlock(&lowest_rq->lock);
> > +           lowest_rq = NULL;
> > +           dst_cpu = -1;
> > +   }
> > +
> > +   return dst_cpu;
> > +}
> > +
> > +/*
> > + * If the current CPU has more than one RT task, see if the non
> > + * running task can migrate over to a CPU that is running a task
> > + * of lesser priority.
> > + */
> > +static int push_rt_task(struct rq *this_rq)
> > +{
> > +   struct task_struct *next_task;
> > +   struct rq *lowest_rq;
> > +   int dst_cpu;
> > +   int ret = 0;
> > +   cpumask_t cpu_mask;
> > +
> > +   assert_spin_locked(&this_rq->lock);
> > +
> > +   next_task = rt_next_highest_task(this_rq);
> > +   if (!next_task)
> > +           return 0;
> > +
> > +   cpus_and(cpu_mask, cpu_online_map, next_task->cpus_allowed);
> > +
> > +   /* We might release this_rq lock */
> > +   get_task_struct(next_task);
> > +
> > +   /* find_lowest_rq locks cpu_rq(dst_cpu) if found */
> > +   dst_cpu = find_lowest_cpu(&cpu_mask, next_task, this_rq);
> > +   if (dst_cpu < 0)
> > +           goto out;
> > +
> > +   lowest_rq = cpu_rq(dst_cpu);
> > +
>
> [1] Because that is what we use anyway.
>
> > +   assert_spin_locked(&lowest_rq->lock);
> > +
> > +   deactivate_task(this_rq, next_task, 0);
> > +   set_task_cpu(next_task, dst_cpu);

And here is where i need the dst_cpu. Do you prefer that I pass in the
cpu as a pointer? or just simply use lowest_rq->cpu. I guess that would
work too.

> > +   activate_task(lowest_rq, next_task, 0);
> > +
> > +   resched_task(lowest_rq->curr);
> > +
> > +   spin_unlock(&lowest_rq->lock);
> > +
> > +   ret = 1;
> > +out:
> > +   put_task_struct(next_task);
> > +
> > +   return ret;
> > +}
> > +
> >  /*
> >   * Pull RT tasks from other CPUs in the RT-overload
> >   * case. Interrupts are disabled, local rq is locked.
> > @@ -2202,19 +2323,28 @@ static inline void finish_task_switch(st
> >      * be dropped twice.
> >      *              Manfred Spraul <[EMAIL PROTECTED]>
> >      */
> > +   prev_state = prev->state;
> > +   _finish_arch_switch(prev);
> > +#if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
> > +   rq->curr_prio = current->prio;
> > +#endif
> > +   finish_lock_switch(rq, prev);
> >  #if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
> >     /*
> >      * If we pushed an RT task off the runqueue,
> > -    * then kick other CPUs, they might run it:
> > -    */
> > -   if (unlikely(rt_task(current) && rq->rt_nr_running > 1)) {
> > -           schedstat_inc(rq, rto_schedule);
> > -           smp_send_reschedule_allbutself_cpumask(current->cpus_allowed);
> > +    * then kick other CPUs, they might run it.
> > +    * Note we may release the rq lock, and since
> > +    * the lock was owned by prev, we need to release it
> > +    * first via finish_lock_switch and then reaquire it.
> > +    */
> > +   if (unlikely(rt_task(current))) {
> > +           spin_lock(&rq->lock);
> > +           /* push_rt_task will return true if it moved an RT */
> > +           while (push_rt_task(rq))
> > +                   ;
> > +           spin_unlock(&rq->lock);
> >     }
> >  #endif
> > -   prev_state = prev->state;
> > -   _finish_arch_switch(prev);
> > -   finish_lock_switch(rq, prev);
>
> finish_lock_switch() seems to do spin_unlock_irq() in the tree I'm
> looking at. which mean you just 'forgot' to re-enable IRQs.

In my tree (2.6.23-rt1) it is just a spin_unlock().

-- Steve
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to