Re: [RFCv5, 18/46] arm: topology: Define TC2 energy and provide it to the scheduler
Hi Leo, On 08/17/2015 02:19 AM, Leo Yan wrote: [...] diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index b35d3e5..bbe20c7 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -274,6 +274,119 @@ void store_cpu_topology(unsigned int cpuid) cpu_topology[cpuid].socket_id, mpidr); } +/* + * ARM TC2 specific energy cost model data. There are no unit requirements for + * the data. Data can be normalized to any reference point, but the + * normalization must be consistent. That is, one bogo-joule/watt must be the + * same quantity for all data, but we don't care what it is. + */ +static struct idle_state idle_states_cluster_a7[] = { +{ .power = 25 }, /* WFI */ This state is confused. Is this state corresponding to all CPUs have been powered off but L2 cache RAM array and SCU are still power on? This is what we refer to as 'active idle'. All cpus of the cluster are in WFI but the cluster is not in cluster-sleep yet. We measure the corresponding energy value by disabling the 'cluster-sleep-[b,l]' state and let the cpus do nothing for a specific time period. +{ .power = 10 }, /* cluster-sleep-l */ Is this status means all CPU and cluster have been powered off, if so then it will have no power consumption anymore... The cluster is in cluster-sleep but there is still some peripheral related to the cluster active which explains this power value we calculated from the pre/post energy value diff (by reading the vexpress energy counter for this cluster) and the time period we were idling on this cluster. + }; + +static struct idle_state idle_states_cluster_a15[] = { +{ .power = 70 }, /* WFI */ +{ .power = 25 }, /* cluster-sleep-b */ + }; + +static struct capacity_state cap_states_cluster_a7[] = { + /* Cluster only power */ +{ .cap = 150, .power = 2967, }, /* 350 MHz */ For cluster level's capacity, does it mean need run benchmark on all CPUs within cluster? We run an 'always running thread per cpu' workload on {n, n-1, ..., 1} cpus of a cluster (hotplug-out the other cpus) for a specific time period. Then we calculate the cluster power value by extrapolating from the power values for the {n, n-1, ... 1} test runs and use the delta between a n and n+1 test run value as core power value. [...] +static struct idle_state idle_states_core_a7[] = { +{ .power = 0 }, /* WFI */ Should have two idle states for CPU level (WFI and CPU's power off)? The ARM TC2 platform has only 2 idle states, there is no 'cpu power off': # cat /sys/devices/system/cpu/cpu[0,2]/cpuidle/state*/name WFI cluster-sleep-b WFI cluster-sleep-l [...] -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFCv5, 18/46] arm: topology: Define TC2 energy and provide it to the scheduler
Hi Leo, On 08/17/2015 02:19 AM, Leo Yan wrote: [...] diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index b35d3e5..bbe20c7 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -274,6 +274,119 @@ void store_cpu_topology(unsigned int cpuid) cpu_topology[cpuid].socket_id, mpidr); } +/* + * ARM TC2 specific energy cost model data. There are no unit requirements for + * the data. Data can be normalized to any reference point, but the + * normalization must be consistent. That is, one bogo-joule/watt must be the + * same quantity for all data, but we don't care what it is. + */ +static struct idle_state idle_states_cluster_a7[] = { +{ .power = 25 }, /* WFI */ This state is confused. Is this state corresponding to all CPUs have been powered off but L2 cache RAM array and SCU are still power on? This is what we refer to as 'active idle'. All cpus of the cluster are in WFI but the cluster is not in cluster-sleep yet. We measure the corresponding energy value by disabling the 'cluster-sleep-[b,l]' state and let the cpus do nothing for a specific time period. +{ .power = 10 }, /* cluster-sleep-l */ Is this status means all CPU and cluster have been powered off, if so then it will have no power consumption anymore... The cluster is in cluster-sleep but there is still some peripheral related to the cluster active which explains this power value we calculated from the pre/post energy value diff (by reading the vexpress energy counter for this cluster) and the time period we were idling on this cluster. + }; + +static struct idle_state idle_states_cluster_a15[] = { +{ .power = 70 }, /* WFI */ +{ .power = 25 }, /* cluster-sleep-b */ + }; + +static struct capacity_state cap_states_cluster_a7[] = { + /* Cluster only power */ +{ .cap = 150, .power = 2967, }, /* 350 MHz */ For cluster level's capacity, does it mean need run benchmark on all CPUs within cluster? We run an 'always running thread per cpu' workload on {n, n-1, ..., 1} cpus of a cluster (hotplug-out the other cpus) for a specific time period. Then we calculate the cluster power value by extrapolating from the power values for the {n, n-1, ... 1} test runs and use the delta between a n and n+1 test run value as core power value. [...] +static struct idle_state idle_states_core_a7[] = { +{ .power = 0 }, /* WFI */ Should have two idle states for CPU level (WFI and CPU's power off)? The ARM TC2 platform has only 2 idle states, there is no 'cpu power off': # cat /sys/devices/system/cpu/cpu[0,2]/cpuidle/state*/name WFI cluster-sleep-b WFI cluster-sleep-l [...] -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFCv5, 18/46] arm: topology: Define TC2 energy and provide it to the scheduler
Hi Morten, On Tue, Jul 07, 2015 at 07:24:01PM +0100, Morten Rasmussen wrote: > From: Dietmar Eggemann > > This patch is only here to be able to test provisioning of energy related > data from an arch topology shim layer to the scheduler. Since there is no > code today which deals with extracting energy related data from the dtb or > acpi, and process it in the topology shim layer, the content of the > sched_group_energy structures as well as the idle_state and capacity_state > arrays are hard-coded here. > > This patch defines the sched_group_energy structure as well as the > idle_state and capacity_state array for the cluster (relates to sched > groups (sgs) in DIE sched domain level) and for the core (relates to sgs > in MC sd level) for a Cortex A7 as well as for a Cortex A15. > It further provides related implementations of the sched_domain_energy_f > functions (cpu_cluster_energy() and cpu_core_energy()). > > To be able to propagate this information from the topology shim layer to > the scheduler, the elements of the arm_topology[] table have been > provisioned with the appropriate sched_domain_energy_f functions. > > cc: Russell King > > Signed-off-by: Dietmar Eggemann > > --- > arch/arm/kernel/topology.c | 118 +++-- > 1 file changed, 115 insertions(+), 3 deletions(-) > > diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c > index b35d3e5..bbe20c7 100644 > --- a/arch/arm/kernel/topology.c > +++ b/arch/arm/kernel/topology.c > @@ -274,6 +274,119 @@ void store_cpu_topology(unsigned int cpuid) > cpu_topology[cpuid].socket_id, mpidr); > } > > +/* > + * ARM TC2 specific energy cost model data. There are no unit requirements > for > + * the data. Data can be normalized to any reference point, but the > + * normalization must be consistent. That is, one bogo-joule/watt must be the > + * same quantity for all data, but we don't care what it is. > + */ > +static struct idle_state idle_states_cluster_a7[] = { > + { .power = 25 }, /* WFI */ This state is confused. Is this state corresponding to all CPUs have been powered off but L2 cache RAM array and SCU are still power on? > + { .power = 10 }, /* cluster-sleep-l */ Is this status means all CPU and cluster have been powered off, if so then it will have no power consumption anymore... > + }; > + > +static struct idle_state idle_states_cluster_a15[] = { > + { .power = 70 }, /* WFI */ > + { .power = 25 }, /* cluster-sleep-b */ > + }; > + > +static struct capacity_state cap_states_cluster_a7[] = { > + /* Cluster only power */ > + { .cap = 150, .power = 2967, }, /* 350 MHz */ For cluster level's capacity, does it mean need run benchmark on all CPUs within cluster? > + { .cap = 172, .power = 2792, }, /* 400 MHz */ > + { .cap = 215, .power = 2810, }, /* 500 MHz */ > + { .cap = 258, .power = 2815, }, /* 600 MHz */ > + { .cap = 301, .power = 2919, }, /* 700 MHz */ > + { .cap = 344, .power = 2847, }, /* 800 MHz */ > + { .cap = 387, .power = 3917, }, /* 900 MHz */ > + { .cap = 430, .power = 4905, }, /* 1000 MHz */ > + }; > + > +static struct capacity_state cap_states_cluster_a15[] = { > + /* Cluster only power */ > + { .cap = 426, .power = 7920, }, /* 500 MHz */ > + { .cap = 512, .power = 8165, }, /* 600 MHz */ > + { .cap = 597, .power = 8172, }, /* 700 MHz */ > + { .cap = 682, .power = 8195, }, /* 800 MHz */ > + { .cap = 768, .power = 8265, }, /* 900 MHz */ > + { .cap = 853, .power = 8446, }, /* 1000 MHz */ > + { .cap = 938, .power = 11426, }, /* 1100 MHz */ > + { .cap = 1024, .power = 15200, }, /* 1200 MHz */ > + }; > + > +static struct sched_group_energy energy_cluster_a7 = { > + .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a7), > + .idle_states= idle_states_cluster_a7, > + .nr_cap_states = ARRAY_SIZE(cap_states_cluster_a7), > + .cap_states = cap_states_cluster_a7, > +}; > + > +static struct sched_group_energy energy_cluster_a15 = { > + .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a15), > + .idle_states= idle_states_cluster_a15, > + .nr_cap_states = ARRAY_SIZE(cap_states_cluster_a15), > + .cap_states = cap_states_cluster_a15, > +}; > + > +static struct idle_state idle_states_core_a7[] = { > + { .power = 0 }, /* WFI */ Should have two idle states for CPU level (WFI and CPU's power off)? > + }; > + > +static struct idle_state idle_states_core_a15[] = { > + { .power = 0 }, /* WFI */ > + }; > + > +static struct capacity_state cap_states_core_a7[] = { > + /* Power per cpu */ > + { .cap = 150, .power = 187, }, /* 350 MHz */ > + { .cap = 172, .power = 275, }, /* 400 MHz */ > + { .cap = 215, .power = 334, }, /* 500 MHz */ > + { .cap = 258, .power = 407, }, /* 600 MHz */ > + { .cap = 301, .power =
Re: [RFCv5, 18/46] arm: topology: Define TC2 energy and provide it to the scheduler
Hi Morten, On Tue, Jul 07, 2015 at 07:24:01PM +0100, Morten Rasmussen wrote: From: Dietmar Eggemann dietmar.eggem...@arm.com This patch is only here to be able to test provisioning of energy related data from an arch topology shim layer to the scheduler. Since there is no code today which deals with extracting energy related data from the dtb or acpi, and process it in the topology shim layer, the content of the sched_group_energy structures as well as the idle_state and capacity_state arrays are hard-coded here. This patch defines the sched_group_energy structure as well as the idle_state and capacity_state array for the cluster (relates to sched groups (sgs) in DIE sched domain level) and for the core (relates to sgs in MC sd level) for a Cortex A7 as well as for a Cortex A15. It further provides related implementations of the sched_domain_energy_f functions (cpu_cluster_energy() and cpu_core_energy()). To be able to propagate this information from the topology shim layer to the scheduler, the elements of the arm_topology[] table have been provisioned with the appropriate sched_domain_energy_f functions. cc: Russell King li...@arm.linux.org.uk Signed-off-by: Dietmar Eggemann dietmar.eggem...@arm.com --- arch/arm/kernel/topology.c | 118 +++-- 1 file changed, 115 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index b35d3e5..bbe20c7 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -274,6 +274,119 @@ void store_cpu_topology(unsigned int cpuid) cpu_topology[cpuid].socket_id, mpidr); } +/* + * ARM TC2 specific energy cost model data. There are no unit requirements for + * the data. Data can be normalized to any reference point, but the + * normalization must be consistent. That is, one bogo-joule/watt must be the + * same quantity for all data, but we don't care what it is. + */ +static struct idle_state idle_states_cluster_a7[] = { + { .power = 25 }, /* WFI */ This state is confused. Is this state corresponding to all CPUs have been powered off but L2 cache RAM array and SCU are still power on? + { .power = 10 }, /* cluster-sleep-l */ Is this status means all CPU and cluster have been powered off, if so then it will have no power consumption anymore... + }; + +static struct idle_state idle_states_cluster_a15[] = { + { .power = 70 }, /* WFI */ + { .power = 25 }, /* cluster-sleep-b */ + }; + +static struct capacity_state cap_states_cluster_a7[] = { + /* Cluster only power */ + { .cap = 150, .power = 2967, }, /* 350 MHz */ For cluster level's capacity, does it mean need run benchmark on all CPUs within cluster? + { .cap = 172, .power = 2792, }, /* 400 MHz */ + { .cap = 215, .power = 2810, }, /* 500 MHz */ + { .cap = 258, .power = 2815, }, /* 600 MHz */ + { .cap = 301, .power = 2919, }, /* 700 MHz */ + { .cap = 344, .power = 2847, }, /* 800 MHz */ + { .cap = 387, .power = 3917, }, /* 900 MHz */ + { .cap = 430, .power = 4905, }, /* 1000 MHz */ + }; + +static struct capacity_state cap_states_cluster_a15[] = { + /* Cluster only power */ + { .cap = 426, .power = 7920, }, /* 500 MHz */ + { .cap = 512, .power = 8165, }, /* 600 MHz */ + { .cap = 597, .power = 8172, }, /* 700 MHz */ + { .cap = 682, .power = 8195, }, /* 800 MHz */ + { .cap = 768, .power = 8265, }, /* 900 MHz */ + { .cap = 853, .power = 8446, }, /* 1000 MHz */ + { .cap = 938, .power = 11426, }, /* 1100 MHz */ + { .cap = 1024, .power = 15200, }, /* 1200 MHz */ + }; + +static struct sched_group_energy energy_cluster_a7 = { + .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a7), + .idle_states= idle_states_cluster_a7, + .nr_cap_states = ARRAY_SIZE(cap_states_cluster_a7), + .cap_states = cap_states_cluster_a7, +}; + +static struct sched_group_energy energy_cluster_a15 = { + .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a15), + .idle_states= idle_states_cluster_a15, + .nr_cap_states = ARRAY_SIZE(cap_states_cluster_a15), + .cap_states = cap_states_cluster_a15, +}; + +static struct idle_state idle_states_core_a7[] = { + { .power = 0 }, /* WFI */ Should have two idle states for CPU level (WFI and CPU's power off)? + }; + +static struct idle_state idle_states_core_a15[] = { + { .power = 0 }, /* WFI */ + }; + +static struct capacity_state cap_states_core_a7[] = { + /* Power per cpu */ + { .cap = 150, .power = 187, }, /* 350 MHz */ + { .cap = 172, .power = 275, }, /* 400 MHz */ + { .cap = 215, .power = 334, }, /* 500 MHz */ + { .cap = 258, .power = 407, }, /* 600 MHz */ + { .cap = 301, .power = 447, }, /* 700 MHz */ + {