This patch add the power aware scheduler knob into sysfs:

$cat /sys/devices/system/cpu/sched_policy/available_sched_policy
performance powersaving

$cat /sys/devices/system/cpu/sched_policy/current_sched_policy
powersaving

The using sched policy is 'powersaving'. User can change the policy
by commend 'echo':
 echo performance > /sys/devices/system/cpu/current_sched_policy

Power aware scheduling will has different behavior according to
different policy:

performance: the current scheduling behaviour, try to spread tasks
                on more CPU sockets or cores.
powersaving: will shrink tasks into sched group until the group's
                nr_running is up to group_weight.

Signed-off-by: Alex Shi <alex....@intel.com>
---
 Documentation/ABI/testing/sysfs-devices-system-cpu | 21 +++++++
 drivers/base/cpu.c                                 |  2 +
 include/linux/cpu.h                                |  2 +
 kernel/sched/fair.c                                | 68 +++++++++++++++++++++-
 kernel/sched/sched.h                               |  5 ++
 5 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu 
b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 6943133..1909d3e 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -53,6 +53,27 @@ Description: Dynamic addition and removal of CPU's.  This is 
not hotplug
                the system.  Information writtento the file to remove CPU's
                is architecture specific.
 
+What:          /sys/devices/system/cpu/sched_policy/current_sched_policy
+               /sys/devices/system/cpu/sched_policy/available_sched_policy
+Date:          Oct 2012
+Contact:       Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:   CFS scheduler policy showing and setting interface.
+
+               available_sched_policy shows there are 2 kinds of policy now:
+               performance and powersaving.
+               current_sched_policy shows current scheduler policy. And user
+               can change the policy by writing it.
+
+               Policy decides that CFS scheduler how to distribute tasks onto
+               which CPU unit when tasks number less than LCPU number in system
+
+               performance: try to spread tasks onto more CPU sockets,
+               more CPU cores.
+
+               powersaving:     try to shrink tasks onto same core or same CPU
+               until running task number beyond the LCPU number in the core
+               or socket.
+
 What:          /sys/devices/system/cpu/cpu#/node
 Date:          October 2009
 Contact:       Linux memory management mailing list <linux...@kvack.org>
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 6345294..5f6a573 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -330,4 +330,6 @@ void __init cpu_dev_init(void)
                panic("Failed to register CPU subsystem");
 
        cpu_dev_register_generic();
+
+       create_sysfs_sched_policy_group(cpu_subsys.dev_root);
 }
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index ce7a074..b2e9265 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -36,6 +36,8 @@ extern void cpu_remove_dev_attr(struct device_attribute 
*attr);
 extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
 extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
 
+extern int create_sysfs_sched_policy_group(struct device *dev);
+
 #ifdef CONFIG_HOTPLUG_CPU
 extern void unregister_cpu(struct cpu *cpu);
 extern ssize_t arch_cpu_probe(const char *, size_t);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2cebc81..dedc576 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6383,7 +6383,6 @@ void unregister_fair_sched_group(struct task_group *tg, 
int cpu) { }
 
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
-
 static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct 
*task)
 {
        struct sched_entity *se = &task->se;
@@ -6399,6 +6398,73 @@ static unsigned int get_rr_interval_fair(struct rq *rq, 
struct task_struct *task
        return rr_interval;
 }
 
+/* The default scheduler policy is 'performance'. */
+int __read_mostly sched_policy = SCHED_POLICY_PERFORMANCE;
+
+#ifdef CONFIG_SYSFS
+static ssize_t show_available_sched_policy(struct device *dev,
+               struct device_attribute *attr,
+               char *buf)
+{
+       return sprintf(buf, "performance powersaving\n");
+}
+
+static ssize_t show_current_sched_policy(struct device *dev,
+               struct device_attribute *attr,
+               char *buf)
+{
+       if (sched_policy == SCHED_POLICY_PERFORMANCE)
+               return sprintf(buf, "performance\n");
+       else if (sched_policy == SCHED_POLICY_POWERSAVING)
+               return sprintf(buf, "powersaving\n");
+       return 0;
+}
+
+static ssize_t set_sched_policy(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t count)
+{
+       unsigned int ret = -EINVAL;
+       char    str_policy[16];
+
+       ret = sscanf(buf, "%15s", str_policy);
+       if (ret != 1)
+               return -EINVAL;
+
+       if (!strcmp(str_policy, "performance"))
+               sched_policy = SCHED_POLICY_PERFORMANCE;
+       else if (!strcmp(str_policy, "powersaving"))
+               sched_policy = SCHED_POLICY_POWERSAVING;
+       else
+               return -EINVAL;
+
+       return count;
+}
+
+/*
+ *  * Sysfs setup bits:
+ *   */
+static DEVICE_ATTR(current_sched_policy, 0644, show_current_sched_policy,
+                                               set_sched_policy);
+
+static DEVICE_ATTR(available_sched_policy, 0444,
+               show_available_sched_policy, NULL);
+
+static struct attribute *sched_policy_default_attrs[] = {
+       &dev_attr_current_sched_policy.attr,
+       &dev_attr_available_sched_policy.attr,
+       NULL
+};
+static struct attribute_group sched_policy_attr_group = {
+       .attrs = sched_policy_default_attrs,
+       .name = "sched_policy",
+};
+
+int __init create_sysfs_sched_policy_group(struct device *dev)
+{
+       return sysfs_create_group(&dev->kobj, &sched_policy_attr_group);
+}
+#endif /* CONFIG_SYSFS */
+
 /*
  * All the scheduling class methods:
  */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 508e77e..9a6e06c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -9,6 +9,11 @@
 
 extern __read_mostly int scheduler_running;
 
+#define SCHED_POLICY_PERFORMANCE       (0x1)
+#define SCHED_POLICY_POWERSAVING       (0x2)
+
+extern int __read_mostly sched_policy;
+
 /*
  * Convert user-nice values [ -20 ... 0 ... 19 ]
  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
-- 
1.7.12

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to