On Mon, Oct 21, 2013 at 05:14:42PM +0530, Vaidyanathan Srinivasan wrote:
>  kernel/sched/fair.c |   19 +++++++++++++------
>  1 file changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 7c70201..12f0eab 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5807,12 +5807,19 @@ static inline int nohz_kick_needed(struct rq *rq, int 
> cpu)
>  
>       rcu_read_lock();
>       for_each_domain(cpu, sd) {
> +             struct sched_domain *sd_parent = sd->parent;
> +             struct sched_group *sg;
> +             struct sched_group_power *sgp;
> +             int nr_busy;
> +
> +             if (sd_parent) {
> +                     sg = sd_parent->groups;
> +                     sgp = sg->sgp;
> +                     nr_busy = atomic_read(&sgp->nr_busy_cpus);
> +
> +                     if (sd->flags & SD_SHARE_PKG_RESOURCES && nr_busy > 1)
> +                             goto need_kick_unlock;
> +             }
>  
>               if (sd->flags & SD_ASYM_PACKING && nr_busy != sg->group_weight
>                   && (cpumask_first_and(nohz.idle_cpus_mask,
> 

Almost I'd say; what happens on !sd_parent && SD_ASYM_PACKING ?

Also, this made me look at the nr_busy stuff again, and somehow that
entire thing makes me a little sad.

Can't we do something like the below and cut that nr_busy sd iteration
short?

This nohz stuff really needs to be re-thought and made more scalable --
its a royal pain :/


 kernel/sched/core.c  |  4 ++++
 kernel/sched/fair.c  | 21 +++++++++++++++------
 kernel/sched/sched.h |  5 ++---
 3 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c06b8d3..89db8dc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5271,6 +5271,7 @@ DEFINE_PER_CPU(struct sched_domain *, sd_llc);
 DEFINE_PER_CPU(int, sd_llc_size);
 DEFINE_PER_CPU(int, sd_llc_id);
 DEFINE_PER_CPU(struct sched_domain *, sd_numa);
+DEFINE_PER_CPU(struct sched_domain *, sd_busy);
 
 static void update_top_cache_domain(int cpu)
 {
@@ -5290,6 +5291,9 @@ static void update_top_cache_domain(int cpu)
 
        sd = lowest_flag_domain(cpu, SD_NUMA);
        rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
+
+       sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING);
+       rcu_assign_pointer(per_cpu(sd_busy, cpu), sd);
 }
 
 /*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 813dd61..3d5141e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6512,19 +6512,23 @@ static inline void nohz_balance_exit_idle(int cpu)
        }
 }
 
-static inline void set_cpu_sd_state_busy(void)
+static inline void set_cpu_sd_state_busy(int cpu)
 {
        struct sched_domain *sd;
+       struct rq *rq = cpu_rq(cpu);
 
        rcu_read_lock();
-       sd = rcu_dereference_check_sched_domain(this_rq()->sd);
+       sd = rcu_dereference_check_sched_domain(rq->sd);
 
        if (!sd || !sd->nohz_idle)
                goto unlock;
        sd->nohz_idle = 0;
 
-       for (; sd; sd = sd->parent)
+       for (; sd; sd = sd->parent) {
                atomic_inc(&sd->groups->sgp->nr_busy_cpus);
+               if (sd == per_cpu(sd_busy, cpu))
+                       break;
+       }
 unlock:
        rcu_read_unlock();
 }
@@ -6532,16 +6536,21 @@ static inline void set_cpu_sd_state_busy(void)
 void set_cpu_sd_state_idle(void)
 {
        struct sched_domain *sd;
+       int cpu = smp_processor_id();
+       struct rq *rq = cpu_rq(cpu);
 
        rcu_read_lock();
-       sd = rcu_dereference_check_sched_domain(this_rq()->sd);
+       sd = rcu_dereference_check_sched_domain(rq->sd);
 
        if (!sd || sd->nohz_idle)
                goto unlock;
        sd->nohz_idle = 1;
 
-       for (; sd; sd = sd->parent)
+       for (; sd; sd = sd->parent) {
                atomic_dec(&sd->groups->sgp->nr_busy_cpus);
+               if (sd == per_cpu(sd_busy, cpu))
+                       break;
+       }
 unlock:
        rcu_read_unlock();
 }
@@ -6756,7 +6765,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
        * We may be recently in ticked or tickless idle mode. At the first
        * busy tick after returning from idle, we will update the busy stats.
        */
-       set_cpu_sd_state_busy();
+       set_cpu_sd_state_busy(cpu);
        nohz_balance_exit_idle(cpu);
 
        /*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ffc7087..80c5fd2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -599,9 +599,8 @@ static inline struct sched_domain *highest_flag_domain(int 
cpu, int flag)
        struct sched_domain *sd, *hsd = NULL;
 
        for_each_domain(cpu, sd) {
-               if (!(sd->flags & flag))
-                       break;
-               hsd = sd;
+               if (sd->flags & flag)
+                       hsd = sd;
        }
 
        return hsd;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to