On Wed, Apr 23, 2014 at 09:00:36PM +0200, Denys Vlasenko wrote:
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 9cae286..08dd220 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -4255,6 +4255,9 @@ EXPORT_SYMBOL_GPL(yield_to);
>   */
>  void __sched io_schedule(void)
>  {
> +#ifdef CONFIG_NO_HZ_COMMON
> +     int cpu_on_entry = smp_processor_id();
> +#endif
>       struct rq *rq = raw_rq();
>  
>       delayacct_blkio_start();
> @@ -4263,13 +4266,23 @@ void __sched io_schedule(void)
>       current->in_iowait = 1;
>       schedule();
>       current->in_iowait = 0;
> +#ifdef CONFIG_NO_HZ_COMMON
> +     if (atomic_dec_and_test(&rq->nr_iowait)) {
> +             if (smp_processor_id() != cpu_on_entry)
> +                     tick_nohz_iowait_to_idle(cpu_on_entry);
> +     }
> +#else
>       atomic_dec(&rq->nr_iowait);
> +#endif
>       delayacct_blkio_end();
>  }
>  EXPORT_SYMBOL(io_schedule);
>  
>  long __sched io_schedule_timeout(long timeout)
>  {
> +#ifdef CONFIG_NO_HZ_COMMON
> +     int cpu_on_entry = smp_processor_id();
> +#endif
>       struct rq *rq = raw_rq();
>       long ret;
>  
> @@ -4279,7 +4292,14 @@ long __sched io_schedule_timeout(long timeout)
>       current->in_iowait = 1;
>       ret = schedule_timeout(timeout);
>       current->in_iowait = 0;
> +#ifdef CONFIG_NO_HZ_COMMON
> +     if (atomic_dec_and_test(&rq->nr_iowait)) {
> +             if (smp_processor_id() != cpu_on_entry)
> +                     tick_nohz_iowait_to_idle(cpu_on_entry);
> +     }
> +#else
>       atomic_dec(&rq->nr_iowait);
> +#endif
>       delayacct_blkio_end();
>       return ret;
>  }

Why do you insist on writing the same (buggy, see later) code twice?
Really, get lazy already and write it once but use it twice!

Its buggy because the smp_processor_id() is used in preemptible context,
its further buggy because the raw_rq() does it again and could get a rq
on a different cpu.

What you want is something like:

static inline void io_wait_start(struct rq *rq)
{
        atomic_inc(&rq->nr_iowait);
        current->in_iowait = 1;
}

static inline void io_wait_end(struct rq *rq)
{
        current->in_iowait = 0;
#ifdef CONFIG_NO_HZ_COMMON
        if (atomic_dec_and_test(&rq->nr_iowait) && 
                cpu_of(rq) != raw_smp_processor_id()) {
                tick_nohz_iowait_end(cpu_of(rq));
        }
#else
        atomic_dec(&rq->nr_iowait);
#endif
}

Anyway, I suspect that's still broken and you really need that lock
around the state like I did earlier, because the above isn't serialized
between remote wakeup and the cpu waking from nohz.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to