Re: [PATCH v2 -tip] sched/rt: Fix locality of threaded interrupt handlers
On Wed, 2013-02-20 at 10:19 +0100, Alexander Gordeev wrote: > When a interrupt affinity mask targets multiple CPUs, the > RT scheduler selects a runqueue for RT task corresponding > to a threaded interrupt handler without consideration of > where the interrupt is actually gets delivered. It leads > to a suboptimal condition when a hardware interrupt handler > executes on one CPU while the threaded interrupt handler > executes on another CPU. > > This fix alters the behaviour of threaded handler wake-ups > by getting priority to a CPU where the hardware interrupt > handler is executing. As result, most of the time both > halves of interrupt handling are kept local. This fails to explain the ramifications for the 'global' FIFO push-pull stuff. Left patch in-place for Steven. > Signed-off-by: Alexander Gordeev > --- > include/linux/sched.h |2 ++ > kernel/irq/handle.c |2 +- > kernel/sched/core.c |5 + > kernel/sched/rt.c | 27 +++ > 4 files changed, 27 insertions(+), 9 deletions(-) > > diff --git a/include/linux/sched.h b/include/linux/sched.h > index 206bb08..1d59efa 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -1061,6 +1061,7 @@ struct sched_domain; > #define WF_SYNC 0x01/* waker goes to sleep after > wakup */ > #define WF_FORK 0x02/* child wakeup after fork */ > #define WF_MIGRATED 0x04/* internal use, task got migrated */ > +#define WF_LOCAL 0x08/* try to wake up locally */ > > #define ENQUEUE_WAKEUP 1 > #define ENQUEUE_HEAD 2 > @@ -2207,6 +2208,7 @@ extern void xtime_update(unsigned long ticks); > > extern int wake_up_state(struct task_struct *tsk, unsigned int state); > extern int wake_up_process(struct task_struct *tsk); > +extern int wake_up_local(struct task_struct *tsk); > extern void wake_up_new_task(struct task_struct *tsk); > #ifdef CONFIG_SMP > extern void kick_process(struct task_struct *tsk); > diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c > index 131ca17..fe97d0c 100644 > --- a/kernel/irq/handle.c > +++ b/kernel/irq/handle.c > @@ -126,7 +126,7 @@ static void irq_wake_thread(struct irq_desc *desc, struct > irqaction *action) >*/ > atomic_inc(&desc->threads_active); > > - wake_up_process(action->thread); > + wake_up_local(action->thread); > } > > irqreturn_t > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > index 257002c..38413f6 100644 > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -1527,6 +1527,11 @@ int wake_up_process(struct task_struct *p) > } > EXPORT_SYMBOL(wake_up_process); > > +int wake_up_local(struct task_struct *p) > +{ > + return try_to_wake_up(p, TASK_ALL, WF_LOCAL); > +} > + > int wake_up_state(struct task_struct *p, unsigned int state) > { > return try_to_wake_up(p, state, 0); > diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c > index 418feb0..de16e16 100644 > --- a/kernel/sched/rt.c > +++ b/kernel/sched/rt.c > @@ -1234,7 +1234,7 @@ static void yield_task_rt(struct rq *rq) > } > > #ifdef CONFIG_SMP > -static int find_lowest_rq(struct task_struct *task); > +static int find_lowest_rq(struct task_struct *task, bool prefer_this_cpu); > > static int > select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) > @@ -1242,6 +1242,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, > int flags) > struct task_struct *curr; > struct rq *rq; > int cpu; > + bool prefer_this_cpu = flags & WF_LOCAL; > > cpu = task_cpu(p); > > @@ -1258,6 +1259,11 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, > int flags) > curr = ACCESS_ONCE(rq->curr); /* unlocked access */ > > /* > + * If this RT task is a threaded interrupt handler, then > + * it is being awaken from the hardware interrupt handler. > + * In this case try to keep hardware and threaded interrupt > + * handlers as close as possible and wake it up on this CPU. > + * >* If the current task on @p's runqueue is an RT task, then >* try to see if we can wake this RT task up on another >* runqueue. Otherwise simply start this RT task > @@ -1279,11 +1285,12 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, > int flags) >* This test is optimistic, if we get it wrong the load-balancer >* will have to sort it out. >*/ > - if (curr && unlikely(rt_task(curr)) && > - (curr->nr_cpus_allowed < 2 || > - curr->prio <= p->prio) && > - (p->nr_cpus_allowed > 1)) { > - int target = find_lowest_rq(p); > + if (prefer_this_cpu || > + (curr && unlikely(rt_task(curr)) && > + (curr->nr_cpus_allowed < 2 || > + curr->prio <= p->prio) && > + (p->nr_cpus_allowed > 1))) { > + int target = find_lowest_rq(p, prefer_this_cpu); > > if (targ
[PATCH v2 -tip] sched/rt: Fix locality of threaded interrupt handlers
When a interrupt affinity mask targets multiple CPUs, the RT scheduler selects a runqueue for RT task corresponding to a threaded interrupt handler without consideration of where the interrupt is actually gets delivered. It leads to a suboptimal condition when a hardware interrupt handler executes on one CPU while the threaded interrupt handler executes on another CPU. This fix alters the behaviour of threaded handler wake-ups by getting priority to a CPU where the hardware interrupt handler is executing. As result, most of the time both halves of interrupt handling are kept local. Signed-off-by: Alexander Gordeev --- include/linux/sched.h |2 ++ kernel/irq/handle.c |2 +- kernel/sched/core.c |5 + kernel/sched/rt.c | 27 +++ 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 206bb08..1d59efa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1061,6 +1061,7 @@ struct sched_domain; #define WF_SYNC0x01/* waker goes to sleep after wakup */ #define WF_FORK0x02/* child wakeup after fork */ #define WF_MIGRATED0x04/* internal use, task got migrated */ +#define WF_LOCAL 0x08/* try to wake up locally */ #define ENQUEUE_WAKEUP 1 #define ENQUEUE_HEAD 2 @@ -2207,6 +2208,7 @@ extern void xtime_update(unsigned long ticks); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); +extern int wake_up_local(struct task_struct *tsk); extern void wake_up_new_task(struct task_struct *tsk); #ifdef CONFIG_SMP extern void kick_process(struct task_struct *tsk); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 131ca17..fe97d0c 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -126,7 +126,7 @@ static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action) */ atomic_inc(&desc->threads_active); - wake_up_process(action->thread); + wake_up_local(action->thread); } irqreturn_t diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 257002c..38413f6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1527,6 +1527,11 @@ int wake_up_process(struct task_struct *p) } EXPORT_SYMBOL(wake_up_process); +int wake_up_local(struct task_struct *p) +{ + return try_to_wake_up(p, TASK_ALL, WF_LOCAL); +} + int wake_up_state(struct task_struct *p, unsigned int state) { return try_to_wake_up(p, state, 0); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 418feb0..de16e16 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1234,7 +1234,7 @@ static void yield_task_rt(struct rq *rq) } #ifdef CONFIG_SMP -static int find_lowest_rq(struct task_struct *task); +static int find_lowest_rq(struct task_struct *task, bool prefer_this_cpu); static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) @@ -1242,6 +1242,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) struct task_struct *curr; struct rq *rq; int cpu; + bool prefer_this_cpu = flags & WF_LOCAL; cpu = task_cpu(p); @@ -1258,6 +1259,11 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) curr = ACCESS_ONCE(rq->curr); /* unlocked access */ /* +* If this RT task is a threaded interrupt handler, then +* it is being awaken from the hardware interrupt handler. +* In this case try to keep hardware and threaded interrupt +* handlers as close as possible and wake it up on this CPU. +* * If the current task on @p's runqueue is an RT task, then * try to see if we can wake this RT task up on another * runqueue. Otherwise simply start this RT task @@ -1279,11 +1285,12 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) * This test is optimistic, if we get it wrong the load-balancer * will have to sort it out. */ - if (curr && unlikely(rt_task(curr)) && - (curr->nr_cpus_allowed < 2 || -curr->prio <= p->prio) && - (p->nr_cpus_allowed > 1)) { - int target = find_lowest_rq(p); + if (prefer_this_cpu || + (curr && unlikely(rt_task(curr)) && +(curr->nr_cpus_allowed < 2 || + curr->prio <= p->prio) && +(p->nr_cpus_allowed > 1))) { + int target = find_lowest_rq(p, prefer_this_cpu); if (target != -1) cpu = target; @@ -1473,7 +1480,7 @@ next_idx: static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); -static int find_lowest_rq(struct task_struct *task) +static int find_lowest_rq(struct task_struct *task, bool prefer_this_cpu) { struct sched_domain *sd; struct cpumask *lowest_mask = __ge
Re: [PATCH v2 -tip] sched/rt: Fix locality of threaded interrupt handlers
On Mon, Jan 14, 2013 at 03:11:52PM +0100, Alexander Gordeev wrote: > When a interrupt affinity mask targets multiple CPUs, the > RT scheduler selects a runqueue for RT task corresponding > to a threaded interrupt handler without consideration of > where the interrupt is actually gets delivered. It leads > to a suboptimal condition when a hardware interrupt handler > executes on one CPU while the threaded interrupt handler > executes on another CPU. > > This fix alters the behaviour of threaded handler wake-ups > by getting priority to a CPU where the hardware interrupt > handler is executing. As result, most of the time both > halves of interrupt handling are kept local. Hi Thomas, Ingo, Would you ack/consider this patch? Thanks! -- Regards, Alexander Gordeev agord...@redhat.com -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 -tip] sched/rt: Fix locality of threaded interrupt handlers
When a interrupt affinity mask targets multiple CPUs, the RT scheduler selects a runqueue for RT task corresponding to a threaded interrupt handler without consideration of where the interrupt is actually gets delivered. It leads to a suboptimal condition when a hardware interrupt handler executes on one CPU while the threaded interrupt handler executes on another CPU. This fix alters the behaviour of threaded handler wake-ups by getting priority to a CPU where the hardware interrupt handler is executing. As result, most of the time both halves of interrupt handling are kept local. Signed-off-by: Alexander Gordeev --- include/linux/sched.h |2 ++ kernel/irq/handle.c |2 +- kernel/sched/core.c |5 + kernel/sched/rt.c | 27 +++ 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 206bb08..1d59efa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1061,6 +1061,7 @@ struct sched_domain; #define WF_SYNC0x01/* waker goes to sleep after wakup */ #define WF_FORK0x02/* child wakeup after fork */ #define WF_MIGRATED0x04/* internal use, task got migrated */ +#define WF_LOCAL 0x08/* try to wake up locally */ #define ENQUEUE_WAKEUP 1 #define ENQUEUE_HEAD 2 @@ -2207,6 +2208,7 @@ extern void xtime_update(unsigned long ticks); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); +extern int wake_up_local(struct task_struct *tsk); extern void wake_up_new_task(struct task_struct *tsk); #ifdef CONFIG_SMP extern void kick_process(struct task_struct *tsk); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 131ca17..fe97d0c 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -126,7 +126,7 @@ static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action) */ atomic_inc(&desc->threads_active); - wake_up_process(action->thread); + wake_up_local(action->thread); } irqreturn_t diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 257002c..38413f6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1527,6 +1527,11 @@ int wake_up_process(struct task_struct *p) } EXPORT_SYMBOL(wake_up_process); +int wake_up_local(struct task_struct *p) +{ + return try_to_wake_up(p, TASK_ALL, WF_LOCAL); +} + int wake_up_state(struct task_struct *p, unsigned int state) { return try_to_wake_up(p, state, 0); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 418feb0..de16e16 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1234,7 +1234,7 @@ static void yield_task_rt(struct rq *rq) } #ifdef CONFIG_SMP -static int find_lowest_rq(struct task_struct *task); +static int find_lowest_rq(struct task_struct *task, bool prefer_this_cpu); static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) @@ -1242,6 +1242,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) struct task_struct *curr; struct rq *rq; int cpu; + bool prefer_this_cpu = flags & WF_LOCAL; cpu = task_cpu(p); @@ -1258,6 +1259,11 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) curr = ACCESS_ONCE(rq->curr); /* unlocked access */ /* +* If this RT task is a threaded interrupt handler, then +* it is being awaken from the hardware interrupt handler. +* In this case try to keep hardware and threaded interrupt +* handlers as close as possible and wake it up on this CPU. +* * If the current task on @p's runqueue is an RT task, then * try to see if we can wake this RT task up on another * runqueue. Otherwise simply start this RT task @@ -1279,11 +1285,12 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) * This test is optimistic, if we get it wrong the load-balancer * will have to sort it out. */ - if (curr && unlikely(rt_task(curr)) && - (curr->nr_cpus_allowed < 2 || -curr->prio <= p->prio) && - (p->nr_cpus_allowed > 1)) { - int target = find_lowest_rq(p); + if (prefer_this_cpu || + (curr && unlikely(rt_task(curr)) && +(curr->nr_cpus_allowed < 2 || + curr->prio <= p->prio) && +(p->nr_cpus_allowed > 1))) { + int target = find_lowest_rq(p, prefer_this_cpu); if (target != -1) cpu = target; @@ -1473,7 +1480,7 @@ next_idx: static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); -static int find_lowest_rq(struct task_struct *task) +static int find_lowest_rq(struct task_struct *task, bool prefer_this_cpu) { struct sched_domain *sd; struct cpumask *lowest_mask = __ge