Re: [PATCH V2] tracing/timerlat: Hotplug support for the user-space interface

2023-09-24 Thread kernel test robot



Hello,

kernel test robot noticed "WARNING:possible_recursive_locking_detected" on:

commit: d2618f89d9ede5e14fd2eb2d140fff1d31f4948b ("[PATCH V2] tracing/timerlat: 
Hotplug support for the user-space interface")
url: 
https://github.com/intel-lab-lkp/linux/commits/Daniel-Bristot-de-Oliveira/tracing-timerlat-Hotplug-support-for-the-user-space-interface/20230920-234738
base: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git 
2cf0f715623872823a72e451243bbf555d10d032
patch link: 
https://lore.kernel.org/all/6b9a5f306e488bc77bf8521faeade420a0adf3e4.1695224204.git.bris...@kernel.org/
patch subject: [PATCH V2] tracing/timerlat: Hotplug support for the user-space 
interface

in testcase: boot

compiler: gcc-12
test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

(please refer to attached dmesg/kmsg for entire log/backtrace)



If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot 
| Closes: 
https://lore.kernel.org/oe-lkp/202309242253.a3803da4-oliver.s...@intel.com


[  108.795325][T1] WARNING: possible recursive locking detected
[  108.796115][T1] 6.6.0-rc2-00019-gd2618f89d9ed #1 Not tainted
[  108.796898][T1] 
[  108.797664][T1] swapper/0/1 is trying to acquire lock:
[ 108.798384][ T1] 85550630 (cpu_hotplug_lock){}-{0:0}, at: 
__cpuhp_setup_state (kernel/cpu.c:2553) 
[  108.799656][T1]
[  108.799656][T1] but task is already holding lock:
[ 108.800596][ T1] 85550630 (cpu_hotplug_lock){}-{0:0}, at: 
init_osnoise_tracer (kernel/trace/trace_osnoise.c:3186) 
[  108.801843][T1]
[  108.801843][T1] other info that might help us debug this:
[  108.802890][T1]  Possible unsafe locking scenario:
[  108.802890][T1]
[  108.803843][T1]CPU0
[  108.804266][T1]
[  108.804685][T1]   lock(cpu_hotplug_lock);
[  108.805267][T1]   lock(cpu_hotplug_lock);
[  108.805845][T1]
[  108.805845][T1]  *** DEADLOCK ***
[  108.805845][T1]
[  108.806885][T1]  May be due to missing lock nesting notation
[  108.806885][T1]
[  108.807952][T1] 1 lock held by swapper/0/1:
[ 108.808547][ T1] #0: 85550630 (cpu_hotplug_lock){}-{0:0}, at: 
init_osnoise_tracer (kernel/trace/trace_osnoise.c:3186) 
[  108.809838][T1]
[  108.809838][T1] stack backtrace:
[  108.819007][T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 
6.6.0-rc2-00019-gd2618f89d9ed #1
[  108.821026][T1] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), 
BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[  108.823537][T1] Call Trace:
[  108.824310][T1]  
[ 108.824999][ T1] dump_stack_lvl (lib/dump_stack.c:107 (discriminator 4)) 
[ 108.826053][ T1] validate_chain (kernel/locking/lockdep.c:3857) 
[ 108.827124][ T1] ? __pfx_validate_chain (kernel/locking/lockdep.c:3824) 
[ 108.828310][ T1] ? mark_lock (kernel/locking/lockdep.c:4655 (discriminator 
3)) 
[ 108.829327][ T1] __lock_acquire (kernel/locking/lockdep.c:5136) 
[ 108.830430][ T1] ? __cpuhp_setup_state (kernel/cpu.c:2553) 
[ 108.831632][ T1] lock_acquire (kernel/locking/lockdep.c:467 
kernel/locking/lockdep.c:5755 kernel/locking/lockdep.c:5718) 
[ 108.832576][ T1] ? __cpuhp_setup_state (kernel/cpu.c:2553) 
[ 108.833259][ T1] ? __pfx_lock_acquire (kernel/locking/lockdep.c:5721) 
[ 108.833944][ T1] ? __pfx___might_resched (kernel/sched/core.c:10142) 
[ 108.834708][ T1] ? preempt_latency_start (include/linux/ftrace.h:974 
kernel/sched/core.c:5825 kernel/sched/core.c:5822) 
[ 108.835443][ T1] ? __pfx_init_osnoise_tracer 
(kernel/trace/trace_osnoise.c:3162) 
[ 108.836185][ T1] cpus_read_lock (include/linux/percpu-rwsem.h:53 
kernel/cpu.c:489) 
[ 108.836774][ T1] ? __cpuhp_setup_state (kernel/cpu.c:2553) 
[ 108.837437][ T1] __cpuhp_setup_state (kernel/cpu.c:2553) 
[ 108.838089][ T1] ? __pfx_osnoise_cpu_die (kernel/trace/trace_osnoise.c:2168) 
[ 108.839689][ T1] ? __pfx_osnoise_cpu_init (kernel/trace/trace_osnoise.c:2159) 
[ 108.840399][ T1] init_osnoise_tracer (kernel/trace/trace_osnoise.c:2180 
kernel/trace/trace_osnoise.c:3187) 
[ 108.841052][ T1] do_one_initcall (init/main.c:1232) 
[ 108.841674][ T1] ? __pfx_do_one_initcall (init/main.c:1223) 
[ 108.842388][ T1] do_initcalls (init/main.c:1293 init/main.c:1310) 
[ 108.842971][ T1] kernel_init_freeable (init/main.c:1549) 
[ 108.843629][ T1] ? __pfx_kernel_init (init/main.c:1429) 
[ 108.844265][ T1] kernel_init (init/main.c:1439) 
[ 108.844816][ T1] ? __pfx_kernel_init (init/main.c:1429) 
[ 108.845440][ T1] ret_from_fork (arch/x86/kernel/process.c:153) 
[ 108.846002][ T1] ? __pfx_kernel_init (init/main.c:1429) 
[ 108.846634][ T1] ret_from_fork_asm (arch/x86/entry/entry_64.S:312) 
[  108.847259][T1]  
[  108.884553][T1] Loading compiled-in X.509 certificates
[  109.254386][T1] kmemleak: Kernel memory leak detector initialized 

[PATCH V2] tracing/timerlat: Hotplug support for the user-space interface

2023-09-20 Thread Daniel Bristot de Oliveira
The osnoise/per_cpu/CPU$/timerlat_fd is create for each possible
CPU, but it might create confusion if the CPU is not online.

Create the file only for online CPUs, also follow hotplug by
creating and deleting as CPUs come and go.

Fixes: e88ed227f639 ("tracing/timerlat: Add user-space interface")
Signed-off-by: Daniel Bristot de Oliveira 
---
  Changes from V1:
- Fix compilation issue when !HOTPLUG
- Fix init interface | hotplug race
Link: 
https://lore.kernel.org/lkml/b619d9fd08a3bb47018cf40afa95783844a3c1fd.1694789910.git.bris...@kernel.org/

 kernel/trace/trace_osnoise.c | 112 +++
 1 file changed, 86 insertions(+), 26 deletions(-)

diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index bd0d01d00fb9..8422562d9864 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -229,6 +229,19 @@ static inline struct osnoise_variables 
*this_cpu_osn_var(void)
 }
 
 #ifdef CONFIG_TIMERLAT_TRACER
+
+/*
+ * osnoise/per_cpu dir
+ */
+static struct dentry   *osnoise_per_cpu_fd;
+
+struct osnoise_per_cpu_dir {
+   struct dentry *root;
+   struct dentry *timerlat_fd;
+};
+
+static DEFINE_PER_CPU(struct osnoise_per_cpu_dir, osnoise_per_cpu_dir);
+
 /*
  * Runtime information for the timer mode.
  */
@@ -2000,6 +2013,9 @@ static int start_kthread(unsigned int cpu)
char comm[24];
 
if (timerlat_enabled()) {
+   if (!test_bit(OSN_WORKLOAD, &osnoise_options))
+   return 0;
+
snprintf(comm, 24, "timerlat/%d", cpu);
main = timerlat_main;
} else {
@@ -2065,19 +2081,64 @@ static int start_per_cpu_kthreads(void)
return retval;
 }
 
+#ifdef CONFIG_TIMERLAT_TRACER
+static const struct file_operations timerlat_fd_fops;
+static int timerlat_add_per_cpu_interface(long cpu)
+{
+   struct dentry *timerlat_fd, *cpu_dir_fd;
+   char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */
+
+   if (!osnoise_per_cpu_fd)
+   return 0;
+
+   snprintf(cpu_str, 30, "cpu%ld", cpu);
+   cpu_dir_fd = tracefs_create_dir(cpu_str, osnoise_per_cpu_fd);
+
+   if (cpu_dir_fd) {
+   timerlat_fd = trace_create_file("timerlat_fd", TRACE_MODE_READ,
+  cpu_dir_fd, NULL, 
&timerlat_fd_fops);
+   WARN_ON_ONCE(!timerlat_fd);
+   per_cpu_ptr(&osnoise_per_cpu_dir, cpu)->root = cpu_dir_fd;
+   per_cpu_ptr(&osnoise_per_cpu_dir, cpu)->timerlat_fd = 
timerlat_fd;
+
+   /* Record the CPU */
+   d_inode(timerlat_fd)->i_cdev = (void *)(cpu);
+
+   return 0;
+   }
+
+   return -ENOMEM;
+}
+
+static void timerlat_rm_per_cpu_interface(long cpu)
+{
+   struct dentry *cpu_dir = per_cpu_ptr(&osnoise_per_cpu_dir, cpu)->root;
+
+   if (cpu_dir) {
+   tracefs_remove(cpu_dir);
+   per_cpu_ptr(&osnoise_per_cpu_dir, cpu)->root = NULL;
+   per_cpu_ptr(&osnoise_per_cpu_dir, cpu)->timerlat_fd = NULL;
+   }
+}
+#elif defined(CONFIG_HOTPLUG_CPU)
+static int timerlat_add_per_cpu_interface(long cpu) { return 0; };
+static void timerlat_rm_per_cpu_interface(long cpu) {};
+#endif
+
 #ifdef CONFIG_HOTPLUG_CPU
 static void osnoise_hotplug_workfn(struct work_struct *dummy)
 {
unsigned int cpu = smp_processor_id();
 
mutex_lock(&trace_types_lock);
-
-   if (!osnoise_has_registered_instances())
-   goto out_unlock_trace;
-
mutex_lock(&interface_lock);
cpus_read_lock();
 
+   timerlat_add_per_cpu_interface(cpu);
+
+   if (!osnoise_has_registered_instances())
+   goto out_unlock;
+
if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
goto out_unlock;
 
@@ -2086,7 +2147,6 @@ static void osnoise_hotplug_workfn(struct work_struct 
*dummy)
 out_unlock:
cpus_read_unlock();
mutex_unlock(&interface_lock);
-out_unlock_trace:
mutex_unlock(&trace_types_lock);
 }
 
@@ -2106,6 +2166,7 @@ static int osnoise_cpu_init(unsigned int cpu)
  */
 static int osnoise_cpu_die(unsigned int cpu)
 {
+   timerlat_rm_per_cpu_interface(cpu);
stop_kthread(cpu);
return 0;
 }
@@ -2708,10 +2769,7 @@ static int init_timerlat_stack_tracefs(struct dentry 
*top_dir)
 
 static int osnoise_create_cpu_timerlat_fd(struct dentry *top_dir)
 {
-   struct dentry *timerlat_fd;
-   struct dentry *per_cpu;
-   struct dentry *cpu_dir;
-   char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */
+   int retval;
long cpu;
 
/*
@@ -2720,29 +2778,24 @@ static int osnoise_create_cpu_timerlat_fd(struct dentry 
*top_dir)
 * Because osnoise/timerlat have a single workload, having
 * multiple files like these are wast of memory.
 */
-   per_cpu = tracefs_create_dir("per_cpu", top_dir);
-   if (!per_cpu)
+   osnoise_per_cpu_fd