During the creation of sched_domain, we define a pack buddy CPU for each CPU
when one is available. We want to pack at all levels where a group of CPUs can
be power gated independently from others.
On a system that can't power gate a group of CPUs independently, the flag is
set at all sched_domain level and the buddy is set to -1. This is the default
behavior for all architectures.

On a dual clusters / dual cores system which can power gate each core and
cluster independently, the buddy configuration will be :

      | Cluster 0   | Cluster 1   |
      | CPU0 | CPU1 | CPU2 | CPU3 |
-----------------------------------
buddy | CPU0 | CPU0 | CPU0 | CPU2 |

If the cores in a cluster can't be power gated independently, the buddy
configuration becomes:

      | Cluster 0   | Cluster 1   |
      | CPU0 | CPU1 | CPU2 | CPU3 |
-----------------------------------
buddy | CPU0 | CPU1 | CPU0 | CPU0 |

Signed-off-by: Vincent Guittot <vincent.guit...@linaro.org>
---
 kernel/sched/core.c  |    1 +
 kernel/sched/fair.c  |   70 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h |    5 ++++
 3 files changed, 76 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 735e964..0bf5f4d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5184,6 +5184,7 @@ cpu_attach_domain(struct sched_domain *sd, struct 
root_domain *rd, int cpu)
        rcu_assign_pointer(rq->sd, sd);
        destroy_sched_domains(tmp, cpu);
 
+       update_packing_domain(cpu);
        update_top_cache_domain(cpu);
 }
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 11cd136..5547831 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -178,6 +178,76 @@ void sched_init_granularity(void)
        update_sysctl();
 }
 
+#ifdef CONFIG_SMP
+#ifdef CONFIG_SCHED_PACKING_TASKS
+/*
+ * Save the id of the optimal CPU that should be used to pack small tasks
+ * The value -1 is used when no buddy has been found
+ */
+DEFINE_PER_CPU(int, sd_pack_buddy);
+
+/*
+ * Look for the best buddy CPU that can be used to pack small tasks
+ * We make the assumption that it doesn't wort to pack on CPU that share the
+ * same powerline. We look for the 1st sched_domain without the
+ * SD_SHARE_POWERDOMAIN flag. Then we look for the sched_group with the lowest
+ * power per core based on the assumption that their power efficiency is
+ * better
+ */
+void update_packing_domain(int cpu)
+{
+       struct sched_domain *sd;
+       int id = -1;
+
+       sd = highest_flag_domain(cpu, SD_SHARE_POWERDOMAIN);
+       if (!sd)
+               sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd);
+       else
+               sd = sd->parent;
+
+       while (sd && (sd->flags & SD_LOAD_BALANCE)
+               && !(sd->flags & SD_SHARE_POWERDOMAIN)) {
+               struct sched_group *sg = sd->groups;
+               struct sched_group *pack = sg;
+               struct sched_group *tmp;
+
+               /*
+                * The sched_domain of a CPU points on the local sched_group
+                * and this CPU of this local group is a good candidate
+                */
+               id = cpu;
+
+               /* loop the sched groups to find the best one */
+               for (tmp = sg->next; tmp != sg; tmp = tmp->next) {
+                       if (tmp->sgp->power * pack->group_weight >
+                                       pack->sgp->power * tmp->group_weight)
+                               continue;
+
+                       if ((tmp->sgp->power * pack->group_weight ==
+                                       pack->sgp->power * tmp->group_weight)
+                        && (cpumask_first(sched_group_cpus(tmp)) >= id))
+                               continue;
+
+                       /* we have found a better group */
+                       pack = tmp;
+
+                       /* Take the 1st CPU of the new group */
+                       id = cpumask_first(sched_group_cpus(pack));
+               }
+
+               /* Look for another CPU than itself */
+               if (id != cpu)
+                       break;
+
+               sd = sd->parent;
+       }
+
+       pr_debug("CPU%d packing on CPU%d\n", cpu, id);
+       per_cpu(sd_pack_buddy, cpu) = id;
+}
+#endif /* CONFIG_SCHED_PACKING_TASKS */
+#endif /* CONFIG_SMP */
+
 #if BITS_PER_LONG == 32
 # define WMULT_CONST   (~0UL)
 #else
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b3c5653..22e3f1d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1022,6 +1022,11 @@ extern void update_group_power(struct sched_domain *sd, 
int cpu);
 
 extern void trigger_load_balance(struct rq *rq, int cpu);
 extern void idle_balance(int this_cpu, struct rq *this_rq);
+#ifdef CONFIG_SCHED_PACKING_TASKS
+extern void update_packing_domain(int cpu);
+#else
+static inline void update_packing_domain(int cpu) {};
+#endif
 
 extern void idle_enter_fair(struct rq *this_rq);
 extern void idle_exit_fair(struct rq *this_rq);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to