Nick Piggin wrote:
Andrew Morton wrote:

Nick Piggin <[EMAIL PROTECTED]> wrote:


Andrew, IMO this is another bug to hold 2.6.11 for.



Sure. I wouldn't consider Bodo's patch to be the one to use though..


No. Something similar could be done that works on all architectures
and all wait_task_inactive callers (and is confined to sched.c). That
would still be more or less a hack to work around smtnice's unfortunate
locking though.


Something like the following (untested) extension of Bodo's work could be the minimal fix for 2.6.11. As I've said though, I'd consider it a hack and prefer to do something about the locking. That could be done after 2.6.11 though. Depends how you feel.

Bodo, I wonder if this looks like a suitable fix for your problem?


---

 linux-2.6-npiggin/include/linux/init_task.h |    1 +
 linux-2.6-npiggin/include/linux/sched.h     |    1 +
 linux-2.6-npiggin/kernel/sched.c            |   12 ++++++++++--
 3 files changed, 12 insertions(+), 2 deletions(-)

diff -puN include/linux/sched.h~sched-fixup-locking include/linux/sched.h
--- linux-2.6/include/linux/sched.h~sched-fixup-locking 2005-02-05 
15:24:00.000000000 +1100
+++ linux-2.6-npiggin/include/linux/sched.h     2005-02-05 15:24:39.000000000 
+1100
@@ -533,6 +533,7 @@ struct task_struct {
        unsigned long ptrace;
 
        int lock_depth;         /* Lock depth */
+       int on_cpu;             /* Is the task on the CPU, or in a ctxsw */
 
        int prio, static_prio;
        struct list_head run_list;
diff -puN kernel/sched.c~sched-fixup-locking kernel/sched.c
--- linux-2.6/kernel/sched.c~sched-fixup-locking        2005-02-05 
15:24:02.000000000 +1100
+++ linux-2.6-npiggin/kernel/sched.c    2005-02-05 15:34:37.000000000 +1100
@@ -294,7 +294,7 @@ static DEFINE_PER_CPU(struct runqueue, r
 #ifndef prepare_arch_switch
 # define prepare_arch_switch(rq, next) do { } while (0)
 # define finish_arch_switch(rq, next)  spin_unlock_irq(&(rq)->lock)
-# define task_running(rq, p)           ((rq)->curr == (p))
+# define task_running(rq, p)           ((p)->on_cpu)
 #endif
 
 /*
@@ -867,7 +867,7 @@ void wait_task_inactive(task_t * p)
 repeat:
        rq = task_rq_lock(p, &flags);
        /* Must be off runqueue entirely, not preempted. */
-       if (unlikely(p->array)) {
+       if (unlikely(p->array || p->on_cpu)) {
                /* If it's preempted, we yield.  It could be a while. */
                preempted = !task_running(rq, p);
                task_rq_unlock(rq, &flags);
@@ -2805,11 +2805,18 @@ switch_tasks:
                rq->curr = next;
                ++*switch_count;
 
+               next->on_cpu = 1;
                prepare_arch_switch(rq, next);
                prev = context_switch(rq, prev, next);
                barrier();
 
                finish_task_switch(prev);
+               /*
+                * Some architectures release the runqueue lock before
+                * context switching. Make sure this isn't reordered.
+                */
+               smp_wmb();
+               prev->on_cpu = 0;
        } else
                spin_unlock_irq(&rq->lock);
 
@@ -4055,6 +4062,7 @@ void __devinit init_idle(task_t *idle, i
        set_task_cpu(idle, cpu);
 
        spin_lock_irqsave(&rq->lock, flags);
+       idle->on_cpu = 1;
        rq->curr = rq->idle = idle;
        set_tsk_need_resched(idle);
        spin_unlock_irqrestore(&rq->lock, flags);
diff -puN include/linux/init_task.h~sched-fixup-locking 
include/linux/init_task.h
--- linux-2.6/include/linux/init_task.h~sched-fixup-locking     2005-02-05 
15:24:56.000000000 +1100
+++ linux-2.6-npiggin/include/linux/init_task.h 2005-02-05 15:25:07.000000000 
+1100
@@ -73,6 +73,7 @@ extern struct group_info init_groups;
        .usage          = ATOMIC_INIT(2),                               \
        .flags          = 0,                                            \
        .lock_depth     = -1,                                           \
+       .on_cpu         = 0,                                            \
        .prio           = MAX_PRIO-20,                                  \
        .static_prio    = MAX_PRIO-20,                                  \
        .policy         = SCHED_NORMAL,                                 \

_

Reply via email to