Re: [PATCH] thermal: Add debugfs support for cooling devices
On Thu, Jan 04, 2018 at 12:32:04PM +0530, Viresh Kumar wrote: > This implements the debugfs interface for thermal cooling devices and > exposes some pretty useful statistics. These statistics have proven to > be quite useful specially while doing benchmarks related to the task > scheduler, where we want to make sure that nothing has disrupted the > test, specially the cooling device which may have put constraints on the > CPUs. The information exposed here tells us to what extent the CPUs were > constrained by the thermal framework. > > The read-only "transitions" file is per cooling device and it shows the > total number of cooling state transitions the device has gone through > since the time the cooling device is registered or the time when > statistics were reset last. > > The read-only "time_in_state/stateN" file is per cooling state and it > shows the time spent by the device in the respective cooling state. > > The write-only "reset" file is used to reset the statistics. > > This is how the directory structure looks like for a single cooling > device: > > $ ls -R /sys/kernel/debug/thermal/ > /sys/kernel/debug/thermal/: > cooling_device0 > > /sys/kernel/debug/thermal/cooling_device0: > reset time_in_state_ms transitions > > /sys/kernel/debug/thermal/cooling_device0/time_in_state_ms: > state0 state1 state2 state3 I would prefer this to go into sysfs. Reason is mainly because such stats are also useful on production systems, specially for collecting how a policy / deployment behaves across production population. Cheers, > > This is tested on ARM 32-bit Hisilicon hikey620 board running Ubuntu and > ARM 64-bit Hisilicon hikey960 board running Android. > > Signed-off-by: Viresh Kumar> --- > drivers/thermal/Makefile | 1 + > drivers/thermal/thermal_core.c| 6 ++ > drivers/thermal/thermal_core.h| 18 > drivers/thermal/thermal_debugfs.c | 167 > ++ > drivers/thermal/thermal_helpers.c | 5 +- > drivers/thermal/thermal_sysfs.c | 1 + > include/linux/thermal.h | 1 + > 7 files changed, 198 insertions(+), 1 deletion(-) > create mode 100644 drivers/thermal/thermal_debugfs.c > > diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile > index 610344eb3e03..629f74e73871 100644 > --- a/drivers/thermal/Makefile > +++ b/drivers/thermal/Makefile > @@ -6,6 +6,7 @@ > obj-$(CONFIG_THERMAL)+= thermal_sys.o > thermal_sys-y+= thermal_core.o thermal_sysfs.o \ > thermal_helpers.o > +obj-$(CONFIG_DEBUG_FS) += thermal_debugfs.o > > # interface to/from other layers providing sensors > thermal_sys-$(CONFIG_THERMAL_HWMON) += thermal_hwmon.o > diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c > index 2b1b0ba393a4..bcc34648580f 100644 > --- a/drivers/thermal/thermal_core.c > +++ b/drivers/thermal/thermal_core.c > @@ -997,6 +997,8 @@ __thermal_cooling_device_register(struct device_node *np, > THERMAL_EVENT_UNSPECIFIED); > mutex_unlock(_list_lock); > > + thermal_cooling_device_setup_debugfs(cdev); > + > return cdev; > } > > @@ -1104,6 +1106,7 @@ void thermal_cooling_device_unregister(struct > thermal_cooling_device *cdev) > > mutex_unlock(_list_lock); > > + thermal_cooling_device_remove_debugfs(cdev); > ida_simple_remove(_cdev_ida, cdev->id); > device_unregister(>device); > } > @@ -1544,6 +1547,8 @@ static int __init thermal_init(void) > pr_warn("Thermal: Can not register suspend notifier, return > %d\n", > result); > > + thermal_debugfs_register(); > + > return 0; > > exit_netlink: > @@ -1563,6 +1568,7 @@ static int __init thermal_init(void) > > static void __exit thermal_exit(void) > { > + thermal_debugfs_unregister(); > unregister_pm_notifier(_pm_nb); > of_thermal_destroy_zones(); > genetlink_exit(); > diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h > index 27e3b1df7360..3a8d50aa32dc 100644 > --- a/drivers/thermal/thermal_core.h > +++ b/drivers/thermal/thermal_core.h > @@ -151,4 +151,22 @@ of_thermal_get_trip_points(struct thermal_zone_device > *tz) > } > #endif > > +#ifdef CONFIG_DEBUG_FS > +void thermal_debugfs_register(void); > +void thermal_debugfs_unregister(void); > +void thermal_cooling_device_setup_debugfs(struct thermal_cooling_device > *cdev); > +void thermal_cooling_device_remove_debugfs(struct thermal_cooling_device > *cdev); > +void thermal_cooling_device_debugfs_update(struct thermal_cooling_device > *cdev, > +unsigned long new_state); > +#else > +static inline void thermal_debugfs_register(void) {} > +static inline void thermal_debugfs_unregister(void) {} > +static inline void >
Re: [PATCH] thermal: Add debugfs support for cooling devices
On Thu, Jan 04, 2018 at 12:32:04PM +0530, Viresh Kumar wrote: > This implements the debugfs interface for thermal cooling devices and > exposes some pretty useful statistics. These statistics have proven to > be quite useful specially while doing benchmarks related to the task > scheduler, where we want to make sure that nothing has disrupted the > test, specially the cooling device which may have put constraints on the > CPUs. The information exposed here tells us to what extent the CPUs were > constrained by the thermal framework. > > The read-only "transitions" file is per cooling device and it shows the > total number of cooling state transitions the device has gone through > since the time the cooling device is registered or the time when > statistics were reset last. > > The read-only "time_in_state/stateN" file is per cooling state and it > shows the time spent by the device in the respective cooling state. > > The write-only "reset" file is used to reset the statistics. > > This is how the directory structure looks like for a single cooling > device: > > $ ls -R /sys/kernel/debug/thermal/ > /sys/kernel/debug/thermal/: > cooling_device0 > > /sys/kernel/debug/thermal/cooling_device0: > reset time_in_state_ms transitions > > /sys/kernel/debug/thermal/cooling_device0/time_in_state_ms: > state0 state1 state2 state3 I would prefer this to go into sysfs. Reason is mainly because such stats are also useful on production systems, specially for collecting how a policy / deployment behaves across production population. Cheers, > > This is tested on ARM 32-bit Hisilicon hikey620 board running Ubuntu and > ARM 64-bit Hisilicon hikey960 board running Android. > > Signed-off-by: Viresh Kumar > --- > drivers/thermal/Makefile | 1 + > drivers/thermal/thermal_core.c| 6 ++ > drivers/thermal/thermal_core.h| 18 > drivers/thermal/thermal_debugfs.c | 167 > ++ > drivers/thermal/thermal_helpers.c | 5 +- > drivers/thermal/thermal_sysfs.c | 1 + > include/linux/thermal.h | 1 + > 7 files changed, 198 insertions(+), 1 deletion(-) > create mode 100644 drivers/thermal/thermal_debugfs.c > > diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile > index 610344eb3e03..629f74e73871 100644 > --- a/drivers/thermal/Makefile > +++ b/drivers/thermal/Makefile > @@ -6,6 +6,7 @@ > obj-$(CONFIG_THERMAL)+= thermal_sys.o > thermal_sys-y+= thermal_core.o thermal_sysfs.o \ > thermal_helpers.o > +obj-$(CONFIG_DEBUG_FS) += thermal_debugfs.o > > # interface to/from other layers providing sensors > thermal_sys-$(CONFIG_THERMAL_HWMON) += thermal_hwmon.o > diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c > index 2b1b0ba393a4..bcc34648580f 100644 > --- a/drivers/thermal/thermal_core.c > +++ b/drivers/thermal/thermal_core.c > @@ -997,6 +997,8 @@ __thermal_cooling_device_register(struct device_node *np, > THERMAL_EVENT_UNSPECIFIED); > mutex_unlock(_list_lock); > > + thermal_cooling_device_setup_debugfs(cdev); > + > return cdev; > } > > @@ -1104,6 +1106,7 @@ void thermal_cooling_device_unregister(struct > thermal_cooling_device *cdev) > > mutex_unlock(_list_lock); > > + thermal_cooling_device_remove_debugfs(cdev); > ida_simple_remove(_cdev_ida, cdev->id); > device_unregister(>device); > } > @@ -1544,6 +1547,8 @@ static int __init thermal_init(void) > pr_warn("Thermal: Can not register suspend notifier, return > %d\n", > result); > > + thermal_debugfs_register(); > + > return 0; > > exit_netlink: > @@ -1563,6 +1568,7 @@ static int __init thermal_init(void) > > static void __exit thermal_exit(void) > { > + thermal_debugfs_unregister(); > unregister_pm_notifier(_pm_nb); > of_thermal_destroy_zones(); > genetlink_exit(); > diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h > index 27e3b1df7360..3a8d50aa32dc 100644 > --- a/drivers/thermal/thermal_core.h > +++ b/drivers/thermal/thermal_core.h > @@ -151,4 +151,22 @@ of_thermal_get_trip_points(struct thermal_zone_device > *tz) > } > #endif > > +#ifdef CONFIG_DEBUG_FS > +void thermal_debugfs_register(void); > +void thermal_debugfs_unregister(void); > +void thermal_cooling_device_setup_debugfs(struct thermal_cooling_device > *cdev); > +void thermal_cooling_device_remove_debugfs(struct thermal_cooling_device > *cdev); > +void thermal_cooling_device_debugfs_update(struct thermal_cooling_device > *cdev, > +unsigned long new_state); > +#else > +static inline void thermal_debugfs_register(void) {} > +static inline void thermal_debugfs_unregister(void) {} > +static inline void > +thermal_cooling_device_setup_debugfs(struct
[PATCH] thermal: Add debugfs support for cooling devices
This implements the debugfs interface for thermal cooling devices and exposes some pretty useful statistics. These statistics have proven to be quite useful specially while doing benchmarks related to the task scheduler, where we want to make sure that nothing has disrupted the test, specially the cooling device which may have put constraints on the CPUs. The information exposed here tells us to what extent the CPUs were constrained by the thermal framework. The read-only "transitions" file is per cooling device and it shows the total number of cooling state transitions the device has gone through since the time the cooling device is registered or the time when statistics were reset last. The read-only "time_in_state/stateN" file is per cooling state and it shows the time spent by the device in the respective cooling state. The write-only "reset" file is used to reset the statistics. This is how the directory structure looks like for a single cooling device: $ ls -R /sys/kernel/debug/thermal/ /sys/kernel/debug/thermal/: cooling_device0 /sys/kernel/debug/thermal/cooling_device0: reset time_in_state_ms transitions /sys/kernel/debug/thermal/cooling_device0/time_in_state_ms: state0 state1 state2 state3 This is tested on ARM 32-bit Hisilicon hikey620 board running Ubuntu and ARM 64-bit Hisilicon hikey960 board running Android. Signed-off-by: Viresh Kumar--- drivers/thermal/Makefile | 1 + drivers/thermal/thermal_core.c| 6 ++ drivers/thermal/thermal_core.h| 18 drivers/thermal/thermal_debugfs.c | 167 ++ drivers/thermal/thermal_helpers.c | 5 +- drivers/thermal/thermal_sysfs.c | 1 + include/linux/thermal.h | 1 + 7 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 drivers/thermal/thermal_debugfs.c diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 610344eb3e03..629f74e73871 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_THERMAL) += thermal_sys.o thermal_sys-y += thermal_core.o thermal_sysfs.o \ thermal_helpers.o +obj-$(CONFIG_DEBUG_FS) += thermal_debugfs.o # interface to/from other layers providing sensors thermal_sys-$(CONFIG_THERMAL_HWMON)+= thermal_hwmon.o diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 2b1b0ba393a4..bcc34648580f 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -997,6 +997,8 @@ __thermal_cooling_device_register(struct device_node *np, THERMAL_EVENT_UNSPECIFIED); mutex_unlock(_list_lock); + thermal_cooling_device_setup_debugfs(cdev); + return cdev; } @@ -1104,6 +1106,7 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev) mutex_unlock(_list_lock); + thermal_cooling_device_remove_debugfs(cdev); ida_simple_remove(_cdev_ida, cdev->id); device_unregister(>device); } @@ -1544,6 +1547,8 @@ static int __init thermal_init(void) pr_warn("Thermal: Can not register suspend notifier, return %d\n", result); + thermal_debugfs_register(); + return 0; exit_netlink: @@ -1563,6 +1568,7 @@ static int __init thermal_init(void) static void __exit thermal_exit(void) { + thermal_debugfs_unregister(); unregister_pm_notifier(_pm_nb); of_thermal_destroy_zones(); genetlink_exit(); diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 27e3b1df7360..3a8d50aa32dc 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -151,4 +151,22 @@ of_thermal_get_trip_points(struct thermal_zone_device *tz) } #endif +#ifdef CONFIG_DEBUG_FS +void thermal_debugfs_register(void); +void thermal_debugfs_unregister(void); +void thermal_cooling_device_setup_debugfs(struct thermal_cooling_device *cdev); +void thermal_cooling_device_remove_debugfs(struct thermal_cooling_device *cdev); +void thermal_cooling_device_debugfs_update(struct thermal_cooling_device *cdev, + unsigned long new_state); +#else +static inline void thermal_debugfs_register(void) {} +static inline void thermal_debugfs_unregister(void) {} +static inline void +thermal_cooling_device_setup_debugfs(struct thermal_cooling_device *cdev) {} +static inline void +thermal_cooling_device_remove_debugfs(struct thermal_cooling_device *cdev) {} +static inline void +thermal_cooling_device_debugfs_update(struct thermal_cooling_device *cdev, + unsigned long new_state) {} +#endif /* debugfs */ #endif /* __THERMAL_CORE_H__ */ diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c new file mode 100644 index
[PATCH] thermal: Add debugfs support for cooling devices
This implements the debugfs interface for thermal cooling devices and exposes some pretty useful statistics. These statistics have proven to be quite useful specially while doing benchmarks related to the task scheduler, where we want to make sure that nothing has disrupted the test, specially the cooling device which may have put constraints on the CPUs. The information exposed here tells us to what extent the CPUs were constrained by the thermal framework. The read-only "transitions" file is per cooling device and it shows the total number of cooling state transitions the device has gone through since the time the cooling device is registered or the time when statistics were reset last. The read-only "time_in_state/stateN" file is per cooling state and it shows the time spent by the device in the respective cooling state. The write-only "reset" file is used to reset the statistics. This is how the directory structure looks like for a single cooling device: $ ls -R /sys/kernel/debug/thermal/ /sys/kernel/debug/thermal/: cooling_device0 /sys/kernel/debug/thermal/cooling_device0: reset time_in_state_ms transitions /sys/kernel/debug/thermal/cooling_device0/time_in_state_ms: state0 state1 state2 state3 This is tested on ARM 32-bit Hisilicon hikey620 board running Ubuntu and ARM 64-bit Hisilicon hikey960 board running Android. Signed-off-by: Viresh Kumar --- drivers/thermal/Makefile | 1 + drivers/thermal/thermal_core.c| 6 ++ drivers/thermal/thermal_core.h| 18 drivers/thermal/thermal_debugfs.c | 167 ++ drivers/thermal/thermal_helpers.c | 5 +- drivers/thermal/thermal_sysfs.c | 1 + include/linux/thermal.h | 1 + 7 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 drivers/thermal/thermal_debugfs.c diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 610344eb3e03..629f74e73871 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_THERMAL) += thermal_sys.o thermal_sys-y += thermal_core.o thermal_sysfs.o \ thermal_helpers.o +obj-$(CONFIG_DEBUG_FS) += thermal_debugfs.o # interface to/from other layers providing sensors thermal_sys-$(CONFIG_THERMAL_HWMON)+= thermal_hwmon.o diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 2b1b0ba393a4..bcc34648580f 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -997,6 +997,8 @@ __thermal_cooling_device_register(struct device_node *np, THERMAL_EVENT_UNSPECIFIED); mutex_unlock(_list_lock); + thermal_cooling_device_setup_debugfs(cdev); + return cdev; } @@ -1104,6 +1106,7 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev) mutex_unlock(_list_lock); + thermal_cooling_device_remove_debugfs(cdev); ida_simple_remove(_cdev_ida, cdev->id); device_unregister(>device); } @@ -1544,6 +1547,8 @@ static int __init thermal_init(void) pr_warn("Thermal: Can not register suspend notifier, return %d\n", result); + thermal_debugfs_register(); + return 0; exit_netlink: @@ -1563,6 +1568,7 @@ static int __init thermal_init(void) static void __exit thermal_exit(void) { + thermal_debugfs_unregister(); unregister_pm_notifier(_pm_nb); of_thermal_destroy_zones(); genetlink_exit(); diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 27e3b1df7360..3a8d50aa32dc 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -151,4 +151,22 @@ of_thermal_get_trip_points(struct thermal_zone_device *tz) } #endif +#ifdef CONFIG_DEBUG_FS +void thermal_debugfs_register(void); +void thermal_debugfs_unregister(void); +void thermal_cooling_device_setup_debugfs(struct thermal_cooling_device *cdev); +void thermal_cooling_device_remove_debugfs(struct thermal_cooling_device *cdev); +void thermal_cooling_device_debugfs_update(struct thermal_cooling_device *cdev, + unsigned long new_state); +#else +static inline void thermal_debugfs_register(void) {} +static inline void thermal_debugfs_unregister(void) {} +static inline void +thermal_cooling_device_setup_debugfs(struct thermal_cooling_device *cdev) {} +static inline void +thermal_cooling_device_remove_debugfs(struct thermal_cooling_device *cdev) {} +static inline void +thermal_cooling_device_debugfs_update(struct thermal_cooling_device *cdev, + unsigned long new_state) {} +#endif /* debugfs */ #endif /* __THERMAL_CORE_H__ */ diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c new file mode 100644 index ..077684197250 --- /dev/null +++