On 23/01/18 15:34, Daniel Lezcano wrote:
+/**
+ * cpuidle_cooling_get_max_state - Get the maximum state
+ * @cdev  : the thermal cooling device
+ * @state : a pointer to the state variable to be filled
+ *
+ * The function gives always 100 as the injection ratio is percentile
+ * based for consistency accros different platforms.
+ *
+ * The function can not fail, it returns always zero.
+ */
+static int cpuidle_cooling_get_max_state(struct thermal_cooling_device *cdev,
+                                        unsigned long *state)
+{
+       /*
+        * Depending on the configuration or the hardware, the running
+        * cycle and the idle cycle could be different. We want unify
+        * that to an 0..100 interval, so the set state interface will
+        * be the same whatever the platform is.
+        *
+        * The state 100% will make the cluster 100% ... idle. A 0%
+        * injection ratio means no idle injection at all and 50%
+        * means for 10ms of idle injection, we have 10ms of running
+        * time.
+        */
+       *state = 100;

Doesn't this requires DTs to be changed?

Basically the existing bindings for cpu cooling dictate that
cooling-max-levels == num OPPs - 1 .

Likewise I think cooling-max-levels *defines* the scale used by the cooling maps in the DT. Introducing an alternative scale means that the OPP limits in the cooling-map will be misinterpreted when we bind the cooling device.

Note that we get away with this on Hikey because its mainline cooling map is, AFAICT, deeply sub-optimal and doesn't set any cooling limits. If its cooling map has been tuned (as the Exynos ones are) then I suspect there could be overheating problems when the cpuidle (or combo) CPU coolers are enabled.


Daniel.



+
+       return 0;
+}
+
+/**
+ * cpuidle_cooling_get_cur_state - Get the current cooling state
+ * @cdev: the thermal cooling device
+ * @state: a pointer to the state
+ *
+ * The function just copy the state value from the private thermal
+ * cooling device structure, the mapping is 1 <-> 1.
+ *
+ * The function can not fail, it returns always zero.
+ */
+static int cpuidle_cooling_get_cur_state(struct thermal_cooling_device *cdev,
+                                        unsigned long *state)
+{
+        struct cpuidle_cooling_device *idle_cdev = cdev->devdata;
+
+       *state = idle_cdev->state;
+
+       return 0;
+}
+
+/**
+ * cpuidle_cooling_set_cur_state - Set the current cooling state
+ * @cdev: the thermal cooling device
+ * @state: the target state
+ *
+ * The function checks first if we are initiating the mitigation which
+ * in turn wakes up all the idle injection tasks belonging to the idle
+ * cooling device. In any case, it updates the internal state for the
+ * cooling device.
+ *
+ * The function can not fail, it returns always zero.
+ */
+static int cpuidle_cooling_set_cur_state(struct thermal_cooling_device *cdev,
+                                        unsigned long state)
+{
+       struct cpuidle_cooling_device *idle_cdev = cdev->devdata;
+       unsigned long current_state = idle_cdev->state;
+
+       idle_cdev->state = state;
+
+       if (current_state == 0 && state > 0) {
+               pr_debug("Starting cooling cpus '%*pbl'\n",
+                        cpumask_pr_args(idle_cdev->cpumask));
+               cpuidle_cooling_wakeup(idle_cdev);
+       } else if (current_state > 0 && !state)  {
+               pr_debug("Stopping cooling cpus '%*pbl'\n",
+                        cpumask_pr_args(idle_cdev->cpumask));
+       }
+
+       return 0;
+}
+
+/**
+ * cpuidle_cooling_ops - thermal cooling device ops
+ */
+static struct thermal_cooling_device_ops cpuidle_cooling_ops = {
+       .get_max_state = cpuidle_cooling_get_max_state,
+       .get_cur_state = cpuidle_cooling_get_cur_state,
+       .set_cur_state = cpuidle_cooling_set_cur_state,
+};
+
+/**
+ * cpuidle_cooling_release - Kref based release helper
+ * @kref: a pointer to the kref structure
+ *
+ * This function is automatically called by the kref_put function when
+ * the idle cooling device refcount reaches zero. At this point, we
+ * have the guarantee the structure is no longer in use and we can
+ * safely release all the ressources.
+ */
+static void __init cpuidle_cooling_release(struct kref *kref)
+{
+       struct cpuidle_cooling_device *idle_cdev =
+               container_of(kref, struct cpuidle_cooling_device, kref);
+
+       thermal_cooling_device_unregister(idle_cdev->cdev);
+       kfree(idle_cdev->waitq);
+       kfree(idle_cdev->tsk);
+       kfree(idle_cdev);
+}
+
+/**
+ * cpuidle_cooling_register - Idle cooling device initialization function
+ *
+ * This function is in charge of creating a cooling device per cluster
+ * and register it to thermal framework. For this we rely on the
+ * topology as there is nothing yet describing better the idle state
+ * power domains.
+ *
+ * For each first CPU of the cluster's cpumask, we allocate the idle
+ * cooling device, initialize the general fields and then we initialze
+ * the rest in a per cpu basis.
+ *
+ * Returns zero on success, < 0 otherwise.
+ */
+int cpuidle_cooling_register(void)
+{
+       struct cpuidle_cooling_device *idle_cdev = NULL;
+       struct thermal_cooling_device *cdev;
+       struct task_struct *tsk;
+       struct device_node *np;
+       cpumask_t *cpumask;
+       char dev_name[THERMAL_NAME_LENGTH];
+       int weight;
+       int ret = -ENOMEM, cpu;
+       int index = 0;
+
+       for_each_possible_cpu(cpu) {
+
+               cpumask = topology_core_cpumask(cpu);
+               weight = cpumask_weight(cpumask);
+
+               /*
+                * This condition makes the first cpu belonging to the
+                * cluster to create a cooling device and allocates
+                * the structure. Others CPUs belonging to the same
+                * cluster will just increment the refcount on the
+                * cooling device structure and initialize it.
+                */
+               if (cpu == cpumask_first(cpumask)) {
+
+                       np = of_cpu_device_node_get(cpu);
+
+                       idle_cdev = kzalloc(sizeof(*idle_cdev), GFP_KERNEL);
+                       if (!idle_cdev)
+                               goto out_fail;
+
+                       idle_cdev->tsk = kzalloc(sizeof(*idle_cdev->tsk) *
+                                                weight, GFP_KERNEL);
+                       if (!idle_cdev->tsk)
+                               goto out_fail;
+
+                       idle_cdev->waitq = kzalloc(sizeof(*idle_cdev->waitq) *
+                                                  weight, GFP_KERNEL);
+                       if (!idle_cdev->waitq)
+                               goto out_fail;
+
+                       idle_cdev->idle_cycle = DEFAULT_IDLE_TIME_US;
+
+                       atomic_set(&idle_cdev->count, 0);
+
+                       kref_init(&idle_cdev->kref);
+
+                       /*
+                        * Initialize the timer to wakeup all the idle
+                        * injection tasks
+                        */
+                       hrtimer_init(&idle_cdev->timer,
+                                    CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+                       /*
+                        * The wakeup function callback which is in
+                        * charge of waking up all CPUs belonging to
+                        * the same cluster
+                        */
+                       idle_cdev->timer.function = cpuidle_cooling_wakeup_fn;
+
+                       /*
+                        * The thermal cooling device name
+                        */
+                       snprintf(dev_name, sizeof(dev_name), "thermal-idle-%d", 
index++);
+                       cdev = thermal_of_cooling_device_register(np, dev_name,
+                                                                 idle_cdev,
+                                                                 
&cpuidle_cooling_ops);
+                       if (IS_ERR(cdev)) {
+                               ret = PTR_ERR(cdev);
+                               goto out_fail;
+                       }
+
+                       idle_cdev->cdev = cdev;
+
+                       idle_cdev->cpumask = cpumask;
+
+                       list_add(&idle_cdev->node, &cpuidle_cdev_list);
+
+                       pr_info("Created idle cooling device for cpus 
'%*pbl'\n",
+                               cpumask_pr_args(cpumask));
+               }
+
+               kref_get(&idle_cdev->kref);
+
+               /*
+                * Each cooling device is per package. Each package
+                * has a set of cpus where the physical number is
+                * duplicate in the kernel namespace. We need a way to
+                * address the waitq[] and tsk[] arrays with index
+                * which are not Linux cpu numbered.
+                *
+                * One solution is to use the
+                * topology_core_id(cpu). Other solution is to use the
+                * modulo.
+                *
+                * eg. 2 x cluster - 4 cores.
+                *
+                * Physical numbering -> Linux numbering -> % nr_cpus
+                *
+                * Pkg0 - Cpu0 -> 0 -> 0
+                * Pkg0 - Cpu1 -> 1 -> 1
+                * Pkg0 - Cpu2 -> 2 -> 2
+                * Pkg0 - Cpu3 -> 3 -> 3
+                *
+                * Pkg1 - Cpu0 -> 4 -> 0
+                * Pkg1 - Cpu1 -> 5 -> 1
+                * Pkg1 - Cpu2 -> 6 -> 2
+                * Pkg1 - Cpu3 -> 7 -> 3
+                */
+               init_waitqueue_head(&idle_cdev->waitq[cpu % weight]);
+
+               tsk = kthread_create_on_cpu(cpuidle_cooling_injection_thread,
+                                           idle_cdev, cpu, "kidle_inject/%u");
+               if (IS_ERR(tsk)) {
+                       ret = PTR_ERR(tsk);
+                       goto out_fail;
+               }
+
+               idle_cdev->tsk[cpu % weight] = tsk;
+
+               wake_up_process(tsk);
+       }
+
+       return 0;
+
+out_fail:
+       list_for_each_entry(idle_cdev, &cpuidle_cdev_list, node) {
+
+               for_each_cpu(cpu, idle_cdev->cpumask) {
+
+                       if (idle_cdev->tsk[cpu])
+                               kthread_stop(idle_cdev->tsk[cpu]);
+
+                       kref_put(&idle_cdev->kref, cpuidle_cooling_release);
+               }
+       }
+
+       pr_err("Failed to create idle cooling device (%d)\n", ret);
+
+       return ret;
+}
+#endif
diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index d4292eb..2b5950b 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h
@@ -45,6 +45,7 @@ struct thermal_cooling_device *
  cpufreq_power_cooling_register(struct cpufreq_policy *policy,
                               u32 capacitance, get_static_t plat_static_func);
+extern int cpuidle_cooling_register(void);
  /**
   * of_cpufreq_cooling_register - create cpufreq cooling device based on DT.
   * @np: a valid struct device_node to the cooling device device tree node.
@@ -118,6 +119,11 @@ void cpufreq_cooling_unregister(struct 
thermal_cooling_device *cdev)
  {
        return;
  }
+
+static inline int cpuidle_cooling_register(void)
+{
+       return 0;
+}
  #endif        /* CONFIG_CPU_THERMAL */
#endif /* __CPU_COOLING_H__ */

Reply via email to