From: Martin KaFai Lau [mailto:ka...@fb.com] > >When doing ebpf+kprobe on some hot TCP functions (e.g. >tcp_rcv_established), the kprobe_dispatcher shows up in 'perf report'. > >In kprobe_dispatcher(), there is a lot of cache bouncing >in 'tk->nhit++'. 'tk->nhit' and 'tk->tp.flags' also share >the same cacheline. > >perf report (cycles:pp): >8.30% ipv4_dst_check >4.74% copy_user_enhanced_fast_string >3.93% dst_release >2.80% tcp_v4_rcv >2.31% queued_spin_lock_slowpath >2.30% _raw_spin_lock >1.88% mlx4_en_process_rx_cq >1.84% eth_get_headlen >1.81% ip_rcv_finish >~~~~ >1.71% kprobe_dispatcher >~~~~ >1.55% mlx4_en_xmit >1.09% __probe_kernel_read > >perf report after patch: >9.15% ipv4_dst_check >5.00% copy_user_enhanced_fast_string >4.12% dst_release >2.96% tcp_v4_rcv >2.50% _raw_spin_lock >2.39% queued_spin_lock_slowpath >2.11% eth_get_headlen >2.03% mlx4_en_process_rx_cq >1.69% mlx4_en_xmit >1.19% ip_rcv_finish >1.12% __probe_kernel_read >1.02% ehci_hcd_cleanup
Good catch! Looks good to me! Acked-by: Masami Hiramatsu <masami.hiramatsu...@hitachi.com> Thanks!! > >Signed-off-by: Martin KaFai Lau <ka...@fb.com> >--- > kernel/trace/trace_kprobe.c | 19 +++++++++++++++---- > 1 file changed, 15 insertions(+), 4 deletions(-) > >diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c >index c995644..91a441b 100644 >--- a/kernel/trace/trace_kprobe.c >+++ b/kernel/trace/trace_kprobe.c >@@ -30,7 +30,7 @@ > struct trace_kprobe { > struct list_head list; > struct kretprobe rp; /* Use rp.kp for kprobe use */ >- unsigned long nhit; >+ unsigned long * __percpu nhit; > const char *symbol; /* symbol name */ > struct trace_probe tp; > }; >@@ -274,6 +274,10 @@ static struct trace_kprobe *alloc_trace_kprobe(const char >*group, > if (!tk) > return ERR_PTR(ret); > >+ tk->nhit = alloc_percpu(unsigned long); >+ if (!tk->nhit) >+ goto error; >+ > if (symbol) { > tk->symbol = kstrdup(symbol, GFP_KERNEL); > if (!tk->symbol) >@@ -313,6 +317,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char >*group, > error: > kfree(tk->tp.call.name); > kfree(tk->symbol); >+ free_percpu(tk->nhit); > kfree(tk); > return ERR_PTR(ret); > } >@@ -327,6 +332,7 @@ static void free_trace_kprobe(struct trace_kprobe *tk) > kfree(tk->tp.call.class->system); > kfree(tk->tp.call.name); > kfree(tk->symbol); >+ free_percpu(tk->nhit); > kfree(tk); > } > >@@ -874,9 +880,14 @@ static const struct file_operations kprobe_events_ops = { > static int probes_profile_seq_show(struct seq_file *m, void *v) > { > struct trace_kprobe *tk = v; >+ unsigned long nhit = 0; >+ int cpu; >+ >+ for_each_possible_cpu(cpu) >+ nhit += *per_cpu_ptr(tk->nhit, cpu); > > seq_printf(m, " %-44s %15lu %15lu\n", >- trace_event_name(&tk->tp.call), tk->nhit, >+ trace_event_name(&tk->tp.call), nhit, > tk->rp.kp.nmissed); > > return 0; >@@ -1225,7 +1236,7 @@ static int kprobe_dispatcher(struct kprobe *kp, struct >pt_regs *regs) > { > struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); > >- tk->nhit++; >+ raw_cpu_inc(*tk->nhit); > > if (tk->tp.flags & TP_FLAG_TRACE) > kprobe_trace_func(tk, regs); >@@ -1242,7 +1253,7 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, >struct pt_regs *regs) > { > struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp); > >- tk->nhit++; >+ raw_cpu_inc(*tk->nhit); > > if (tk->tp.flags & TP_FLAG_TRACE) > kretprobe_trace_func(tk, ri, regs); >-- >2.5.1