Avoid running the wakeup irq_work on an isolated CPU. Since the wakeup can
run on any CPU, let's pick a housekeeping CPU to do the job.

This change reduces additional noise when tracing isolated CPUs. For
example, the following ipi_send_cpu stack trace was captured with
nohz_full=2 on the isolated CPU:

          <idle>-0       [002] d.h4.  1255.379293: ipi_send_cpu: cpu=2 
callsite=irq_work_queue+0x2d/0x50 callback=rb_wake_up_waiters+0x0/0x80
          <idle>-0       [002] d.h4.  1255.379329: <stack trace>
 => trace_event_raw_event_ipi_send_cpu
 => __irq_work_queue_local
 => irq_work_queue
 => ring_buffer_unlock_commit
 => trace_buffer_unlock_commit_regs
 => trace_event_buffer_commit
 => trace_event_raw_event_x86_irq_vector
 => __sysvec_apic_timer_interrupt
 => sysvec_apic_timer_interrupt
 => asm_sysvec_apic_timer_interrupt
 => pv_native_safe_halt
 => default_idle
 => default_idle_call
 => do_idle
 => cpu_startup_entry
 => start_secondary
 => common_startup_64

The IRQ work interrupt alone adds considerable noise, but the impact can
get even worse with PREEMPT_RT, because the IRQ work interrupt is then
handled by a separate kernel thread. This requires a task switch and makes
tracing useless for analyzing latency on an isolated CPU.

After applying the patch, the trace is similar, but ipi_send_cpu always
targets a non-isolated CPU.

Unfortunately, irq_work_queue_on() is not NMI-safe. When running in NMI
context, fall back to queuing the irq work on the local CPU.

Signed-off-by: Petr Tesarik <[email protected]>
---
 kernel/trace/ring_buffer.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 41c9f5d079beb..5cf4c6baa2771 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4,6 +4,7 @@
  *
  * Copyright (C) 2008 Steven Rostedt <[email protected]>
  */
+#include <linux/sched/isolation.h>
 #include <linux/trace_recursion.h>
 #include <linux/trace_events.h>
 #include <linux/ring_buffer.h>
@@ -4011,19 +4012,36 @@ static void rb_commit(struct ring_buffer_per_cpu 
*cpu_buffer)
        rb_end_commit(cpu_buffer);
 }
 
+static bool
+rb_irq_work_queue(struct rb_irq_work *irq_work)
+{
+       int cpu;
+
+       /* irq_work_queue_on() is not NMI-safe */
+       if (unlikely(in_nmi()))
+               return irq_work_queue(&irq_work->work);
+
+       /*
+        * If CPU isolation is not active, cpu is always the current
+        * CPU, and the following is equivallent to irq_work_queue().
+        */
+       cpu = housekeeping_any_cpu(HK_TYPE_KERNEL_NOISE);
+       return irq_work_queue_on(&irq_work->work, cpu);
+}
+
 static __always_inline void
 rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
 {
        if (buffer->irq_work.waiters_pending) {
                buffer->irq_work.waiters_pending = false;
                /* irq_work_queue() supplies it's own memory barriers */
-               irq_work_queue(&buffer->irq_work.work);
+               rb_irq_work_queue(&buffer->irq_work);
        }
 
        if (cpu_buffer->irq_work.waiters_pending) {
                cpu_buffer->irq_work.waiters_pending = false;
                /* irq_work_queue() supplies it's own memory barriers */
-               irq_work_queue(&cpu_buffer->irq_work.work);
+               rb_irq_work_queue(&cpu_buffer->irq_work);
        }
 
        if (cpu_buffer->last_pages_touch == 
local_read(&cpu_buffer->pages_touched))
@@ -4043,7 +4061,7 @@ rb_wakeups(struct trace_buffer *buffer, struct 
ring_buffer_per_cpu *cpu_buffer)
        cpu_buffer->irq_work.wakeup_full = true;
        cpu_buffer->irq_work.full_waiters_pending = false;
        /* irq_work_queue() supplies it's own memory barriers */
-       irq_work_queue(&cpu_buffer->irq_work.work);
+       rb_irq_work_queue(&cpu_buffer->irq_work);
 }
 
 #ifdef CONFIG_RING_BUFFER_RECORD_RECURSION
-- 
2.52.0


Reply via email to