Re: x86_pmu_start WARN_ON.
On Mon, 24 Feb 2014, Peter Zijlstra wrote: > On Fri, Feb 21, 2014 at 03:18:38PM -0500, Vince Weaver wrote: > > I've applied the patch and have been unable to trigger the warning with > > either my testcase or a few hours of fuzzing. > > Yay. > > > My only comment on the patch is it could always use some comments. > > > > The perf_event code is really hard to follow as is, without adding > > more uncommented special cases. > > Does the below help a bit? Or is there anywhere in particular you want > more comments? yes, every little bit helps. While chasing these fuzzer-related bugs I end up deep in the perf_event code and many of the routines have no comments at all. Eventually I have to dig out the K+R book to figure out order precendece of ++ prefix operators, have at least 2-3 different files open in editors, plus a bunch of firefox tabs open to http://lxr.free-electrons.com, and even then I misunderstand the code a lot. Vince > > --- > Subject: perf, x86: Add a few more comments > From: Peter Zijlstra > Date: Mon Feb 24 12:26:21 CET 2014 > > Add a few comments on the ->add(), ->del() and ->*_txn() > implementation. > > Requested-by: Vince Weaver > Signed-off-by: Peter Zijlstra > --- > arch/x86/kernel/cpu/perf_event.c | 49 > +++ > arch/x86/kernel/cpu/perf_event.h |8 +++--- > 2 files changed, 39 insertions(+), 18 deletions(-) > > --- a/arch/x86/kernel/cpu/perf_event.c > +++ b/arch/x86/kernel/cpu/perf_event.c > @@ -892,7 +892,6 @@ static void x86_pmu_enable(struct pmu *p >* hw_perf_group_sched_in() or x86_pmu_enable() >* >* step1: save events moving to new counters > - * step2: reprogram moved events into new counters >*/ > for (i = 0; i < n_running; i++) { > event = cpuc->event_list[i]; > @@ -918,6 +917,9 @@ static void x86_pmu_enable(struct pmu *p > x86_pmu_stop(event, PERF_EF_UPDATE); > } > > + /* > + * step2: reprogram moved events into new counters > + */ > for (i = 0; i < cpuc->n_events; i++) { > event = cpuc->event_list[i]; > hwc = &event->hw; > @@ -1043,7 +1045,7 @@ static int x86_pmu_add(struct perf_event > /* >* If group events scheduling transaction was started, >* skip the schedulability test here, it will be performed > - * at commit time (->commit_txn) as a whole > + * at commit time (->commit_txn) as a whole. >*/ > if (cpuc->group_flag & PERF_EVENT_TXN) > goto done_collect; > @@ -1058,6 +1060,10 @@ static int x86_pmu_add(struct perf_event > memcpy(cpuc->assign, assign, n*sizeof(int)); > > done_collect: > + /* > + * Commit the collect_events() state. See x86_pmu_del() and > + * x86_pmu_*_txn(). > + */ > cpuc->n_events = n; > cpuc->n_added += n - n0; > cpuc->n_txn += n - n0; > @@ -1183,28 +1189,38 @@ static void x86_pmu_del(struct perf_even >* If we're called during a txn, we don't need to do anything. >* The events never got scheduled and ->cancel_txn will truncate >* the event_list. > + * > + * XXX assumes any ->del() called during a TXN will only be on > + * an event added during that same TXN. >*/ > if (cpuc->group_flag & PERF_EVENT_TXN) > return; > > + /* > + * Not a TXN, therefore cleanup properly. > + */ > x86_pmu_stop(event, PERF_EF_UPDATE); > > for (i = 0; i < cpuc->n_events; i++) { > - if (event == cpuc->event_list[i]) { > - > - if (i >= cpuc->n_events - cpuc->n_added) > - --cpuc->n_added; > + if (event == cpuc->event_list[i]) > + break; > + } > > - if (x86_pmu.put_event_constraints) > - x86_pmu.put_event_constraints(cpuc, event); > + if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without > ->add() ? */ > + return; > > - while (++i < cpuc->n_events) > - cpuc->event_list[i-1] = cpuc->event_list[i]; > + /* If we have a newly added event; make sure to decrease n_added. */ > + if (i >= cpuc->n_events - cpuc->n_added) > + --cpuc->n_added; > + > + if (x86_pmu.put_event_constraints) > + x86_pmu.put_event_constraints(cpuc, event); > + > + /* Delete the array entry. */ > + while (++i < cpuc->n_events) > + cpuc->event_list[i-1] = cpuc->event_list[i]; > + --cpuc->n_events; > > - --cpuc->n_events; > - break; > - } > - } > perf_event_update_userpage(event); > } > > @@ -1598,7 +1614,8 @@ static void x86_pmu_cancel_txn(struct pm > { > __this
Re: x86_pmu_start WARN_ON.
On Fri, Feb 21, 2014 at 03:18:38PM -0500, Vince Weaver wrote: > I've applied the patch and have been unable to trigger the warning with > either my testcase or a few hours of fuzzing. Yay. > My only comment on the patch is it could always use some comments. > > The perf_event code is really hard to follow as is, without adding > more uncommented special cases. Does the below help a bit? Or is there anywhere in particular you want more comments? --- Subject: perf, x86: Add a few more comments From: Peter Zijlstra Date: Mon Feb 24 12:26:21 CET 2014 Add a few comments on the ->add(), ->del() and ->*_txn() implementation. Requested-by: Vince Weaver Signed-off-by: Peter Zijlstra --- arch/x86/kernel/cpu/perf_event.c | 49 +++ arch/x86/kernel/cpu/perf_event.h |8 +++--- 2 files changed, 39 insertions(+), 18 deletions(-) --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -892,7 +892,6 @@ static void x86_pmu_enable(struct pmu *p * hw_perf_group_sched_in() or x86_pmu_enable() * * step1: save events moving to new counters -* step2: reprogram moved events into new counters */ for (i = 0; i < n_running; i++) { event = cpuc->event_list[i]; @@ -918,6 +917,9 @@ static void x86_pmu_enable(struct pmu *p x86_pmu_stop(event, PERF_EF_UPDATE); } + /* +* step2: reprogram moved events into new counters +*/ for (i = 0; i < cpuc->n_events; i++) { event = cpuc->event_list[i]; hwc = &event->hw; @@ -1043,7 +1045,7 @@ static int x86_pmu_add(struct perf_event /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be performed -* at commit time (->commit_txn) as a whole +* at commit time (->commit_txn) as a whole. */ if (cpuc->group_flag & PERF_EVENT_TXN) goto done_collect; @@ -1058,6 +1060,10 @@ static int x86_pmu_add(struct perf_event memcpy(cpuc->assign, assign, n*sizeof(int)); done_collect: + /* +* Commit the collect_events() state. See x86_pmu_del() and +* x86_pmu_*_txn(). +*/ cpuc->n_events = n; cpuc->n_added += n - n0; cpuc->n_txn += n - n0; @@ -1183,28 +1189,38 @@ static void x86_pmu_del(struct perf_even * If we're called during a txn, we don't need to do anything. * The events never got scheduled and ->cancel_txn will truncate * the event_list. +* +* XXX assumes any ->del() called during a TXN will only be on +* an event added during that same TXN. */ if (cpuc->group_flag & PERF_EVENT_TXN) return; + /* +* Not a TXN, therefore cleanup properly. +*/ x86_pmu_stop(event, PERF_EF_UPDATE); for (i = 0; i < cpuc->n_events; i++) { - if (event == cpuc->event_list[i]) { - - if (i >= cpuc->n_events - cpuc->n_added) - --cpuc->n_added; + if (event == cpuc->event_list[i]) + break; + } - if (x86_pmu.put_event_constraints) - x86_pmu.put_event_constraints(cpuc, event); + if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */ + return; - while (++i < cpuc->n_events) - cpuc->event_list[i-1] = cpuc->event_list[i]; + /* If we have a newly added event; make sure to decrease n_added. */ + if (i >= cpuc->n_events - cpuc->n_added) + --cpuc->n_added; + + if (x86_pmu.put_event_constraints) + x86_pmu.put_event_constraints(cpuc, event); + + /* Delete the array entry. */ + while (++i < cpuc->n_events) + cpuc->event_list[i-1] = cpuc->event_list[i]; + --cpuc->n_events; - --cpuc->n_events; - break; - } - } perf_event_update_userpage(event); } @@ -1598,7 +1614,8 @@ static void x86_pmu_cancel_txn(struct pm { __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN); /* -* Truncate the collected events. +* Truncate collected array by the number of events added in this +* transaction. See x86_pmu_add() and x86_pmu_*_txn(). */ __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn)); __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn)); @@ -1609,6 +1626,8 @@ static void x86_pmu_cancel_txn(struct pm * Commit group events scheduling transaction * Perform the group schedulability test as a
Re: x86_pmu_start WARN_ON.
On Fri, 21 Feb 2014, Peter Zijlstra wrote: > group_sched_in() that fails (for whatever reason), and without x86_pmu > TXN support (because the leader is !x86_pmu), will corrupt the n_added > state. > > If this all is correct; the below ought to cure things. I've applied the patch and have been unable to trigger the warning with either my testcase or a few hours of fuzzing. My only comment on the patch is it could always use some comments. The perf_event code is really hard to follow as is, without adding more uncommented special cases. Vince > > Signed-off-by: Peter Zijlstra > --- > arch/x86/kernel/cpu/perf_event.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/arch/x86/kernel/cpu/perf_event.c > b/arch/x86/kernel/cpu/perf_event.c > index 895604f2e916..79f9f848bee4 100644 > --- a/arch/x86/kernel/cpu/perf_event.c > +++ b/arch/x86/kernel/cpu/perf_event.c > @@ -1192,6 +1192,9 @@ static void x86_pmu_del(struct perf_event *event, int > flags) > for (i = 0; i < cpuc->n_events; i++) { > if (event == cpuc->event_list[i]) { > > + if (i >= cpuc->n_events - cpuc->n_added) > + --cpuc->n_added; > + > if (x86_pmu.put_event_constraints) > x86_pmu.put_event_constraints(cpuc, event); > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, Feb 20, 2014 at 07:23:00PM +0100, Peter Zijlstra wrote: > This is I think the relevant bit: > >pec_1076_warn-2804 [000] d... 147.926153: x86_pmu_disable: > x86_pmu_disable >pec_1076_warn-2804 [000] d... 147.926153: x86_pmu_state: Events: { >pec_1076_warn-2804 [000] d... 147.926156: x86_pmu_state: 0: state: .R > config: ( (null)) >pec_1076_warn-2804 [000] d... 147.926158: x86_pmu_state: 33: state: > AR config: 0 (88011ac99800) >pec_1076_warn-2804 [000] d... 147.926159: x86_pmu_state: } >pec_1076_warn-2804 [000] d... 147.926160: x86_pmu_state: n_events: 1, > n_added: 0, n_txn: 1 >pec_1076_warn-2804 [000] d... 147.926161: x86_pmu_state: Assignment: { >pec_1076_warn-2804 [000] d... 147.926162: x86_pmu_state: 0->33 tag: 1 > config: 0 (88011ac99800) >pec_1076_warn-2804 [000] d... 147.926163: x86_pmu_state: } >pec_1076_warn-2804 [000] d... 147.926166: collect_events: Adding event: > 1 (880119ec8800) So we add the insn:p event (fd[23]) At this point we should have: n_events = 2, n_added = 1, n_txn = 1 >pec_1076_warn-2804 [000] d... 147.926170: collect_events: Adding event: > 0 (8800c9e01800) >pec_1076_warn-2804 [000] d... 147.926172: collect_events: Adding event: > 4 (8800cbab2c00) We try and add the {BP,cycles,br_insn} group (fd[3], fd[4], fd[15]). These events are 0:cycles and 4:br_insn, the BP event isn't x86_pmu so that's not visible. group_sched_in() pmu->start_txn() /* nop - BP pmu */ event_sched_in() event->pmu->add() So here we should end up with: 0: n_events = 3, n_added = 2, n_txn = 2 4: n_events = 4, n_added = 3, n_txn = 3 But seeing the below state on x86_pmu_enable(), the must have failed, because the 0 and 4 events aren't there anymore. Looking at group_sched_in(), since the BP is the leader, its event_sched_in() must have succeeded, for otherwise we would not have seen the sibling adds. But since neither 0 or 4 are in the below state; their event_sched_in() must have failed; but I don't see why, the complete state: 0,0,1:p,4 fits perfectly fine on a core2. However, since we try and schedule 4 it means the 0 event must have succeeded! Therefore the 4 event must have failed, its failure will have put group_sched_in() into the fail path, which will call: event_sched_out() event->pmu->del() on 0 and the BP event. Now x86_pmu_del() will reduce n_events; but it will not reduce n_added; giving what we see below: n_event = 2, n_added = 2, n_txn = 2 >pec_1076_warn-2804 [000] d... 147.926177: x86_pmu_enable: x86_pmu_enable >pec_1076_warn-2804 [000] d... 147.926177: x86_pmu_state: Events: { >pec_1076_warn-2804 [000] d... 147.926179: x86_pmu_state: 0: state: .R > config: ( (null)) >pec_1076_warn-2804 [000] d... 147.926181: x86_pmu_state: 33: state: > AR config: 0 (88011ac99800) >pec_1076_warn-2804 [000] d... 147.926182: x86_pmu_state: } >pec_1076_warn-2804 [000] d... 147.926184: x86_pmu_state: n_events: 2, > n_added: 2, n_txn: 2 >pec_1076_warn-2804 [000] d... 147.926184: x86_pmu_state: Assignment: { >pec_1076_warn-2804 [000] d... 147.926186: x86_pmu_state: 0->33 tag: 1 > config: 0 (88011ac99800) >pec_1076_warn-2804 [000] d... 147.926188: x86_pmu_state: 1->0 tag: 1 > config: 1 (880119ec8800) >pec_1076_warn-2804 [000] d... 147.926188: x86_pmu_state: } >pec_1076_warn-2804 [000] d... 147.926190: x86_pmu_enable: S0: hwc->idx: > 33, hwc->last_cpu: 0, hwc->last_tag: 1 hwc->state: 0 So the problem is that x86_pmu_del(), when called from a group_sched_in() that fails (for whatever reason), and without x86_pmu TXN support (because the leader is !x86_pmu), will corrupt the n_added state. If this all is correct; the below ought to cure things. Signed-off-by: Peter Zijlstra --- arch/x86/kernel/cpu/perf_event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 895604f2e916..79f9f848bee4 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1192,6 +1192,9 @@ static void x86_pmu_del(struct perf_event *event, int flags) for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event_list[i]) { + if (i >= cpuc->n_events - cpuc->n_added) + --cpuc->n_added; + if (x86_pmu.put_event_constraints) x86_pmu.put_event_constraints(cpuc, event); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
and the perf_fuzzer overnight triggered this possibly related warning in x86_pmu_stop() I assume it's this code (the line numbers don't match up for some reason). if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) { x86_pmu.disable(event); cpuc->events[hwc->idx] = NULL; WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; } so possibly also related to PERF_HES_STOPPED? [25526.987821] [ cut here ] [25526.988047] WARNING: CPU: 1 PID: 3849 at arch/x86/kernel/cpu/perf_event.c:1221 x86_pmu_stop+0x71/0xa6() [25526.988047] Modules linked in: cpufreq_userspace cpufreq_stats cpufreq_powersave cpufreq_conservative f71882fg mcs7830 usbnet ohci_pci evdev video wmi psmouse serio_raw coretemp i2c_nforce2 ohci_hcd acpi_cpufreq processor pcspkr thermal_sys button sg ehci_pci ehci_hcd sd_mod usbcore usb_common [25526.988047] CPU: 1 PID: 3849 Comm: perf_fuzzer Tainted: GW 3.14.0-rc3+ #17 [25526.988047] Hardware name: AOpen DE7000/nMCP7ALPx-DE R1.06 Oct.19.2012, BIOS 080015 10/19/2012 [25526.988047] 04c5 88011fc83ca8 81530d11 04c5 [25526.988047] 88011fc83ce8 8103fce4 88011fc83d28 [25526.988047] 81012ca0 8800c9be6000 0004 88011fc8b940 [25526.988047] Call Trace: [25526.988047][] dump_stack+0x49/0x60 [25526.988047] [] warn_slowpath_common+0x81/0x9b [25526.988047] [] ? x86_pmu_stop+0x71/0xa6 [25526.988047] [] warn_slowpath_null+0x1a/0x1c [25526.988047] [] x86_pmu_stop+0x71/0xa6 [25526.988047] [] x86_pmu_del+0x3d/0xb8 [25526.988047] [] event_sched_out+0x92/0x121 [25526.988047] [] group_sched_in+0xf7/0x147 [25526.988047] [] ctx_sched_in+0x15e/0x185 [25526.988047] [] perf_event_sched_in+0x69/0x71 [25526.988047] [] perf_cpu_hrtimer_handler+0x13a/0x1b0 [25526.988047] [] __run_hrtimer+0xba/0x145 [25526.988047] [] ? __perf_install_in_context+0xf0/0xf0 [25526.988047] [] hrtimer_interrupt+0xd5/0x1c3 [25526.988047] [] local_apic_timer_interrupt+0x58/0x5d [25526.988047] [] smp_trace_apic_timer_interrupt+0x53/0x91 [25526.988047] [] trace_apic_timer_interrupt+0x6a/0x70 [25526.988047][] ? system_call_fastpath+0x1a/0x1f [25526.988047] ---[ end trace d5252ca58550eb43 ]--- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, 20 Feb 2014, Vince Weaver wrote: > On Thu, 20 Feb 2014, Vince Weaver wrote: > > > Might be relevant: check the last_cpu values. Right before the above > > it looks like the thread gets moved from CPU 1 to CPU 0 > > (possibly as a result of the long chain started with the > > close() of the tracepoint event), > > so the problem NMI watchdog event being enabled is a different one than > > the one that was disabled just before. > > so is this a false warning? If you get scheduled to a new CPU > and there's an already running CPU-wide event, is that OK? > > Or should x86_pmu_disable() be setting PERF_HES_STOPPED on all events? > It looks like other architectures are (such as armpmu_stop() ). an actually I take that back, other architectures don't. It's just confusing how perf_pmu_disable() calls pmu->pmu_disable() which on arm is: armpmu_disable() which calls: armpmu->stop() which is *not* the same as: armpmu_stop() but is actually armpmu->pmu->stop() Urgh. Vince -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, 20 Feb 2014, Vince Weaver wrote: > Might be relevant: check the last_cpu values. Right before the above > it looks like the thread gets moved from CPU 1 to CPU 0 > (possibly as a result of the long chain started with the > close() of the tracepoint event), > so the problem NMI watchdog event being enabled is a different one than > the one that was disabled just before. so is this a false warning? If you get scheduled to a new CPU and there's an already running CPU-wide event, is that OK? Or should x86_pmu_disable() be setting PERF_HES_STOPPED on all events? It looks like other architectures are (such as armpmu_stop() ). Vince -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, 20 Feb 2014 19:15:38 +0100 Peter Zijlstra wrote: > I think by using the /debug/tracing/events/ftrace/function event, but > I'm not actually sure, I've never used it nor did I write the code to do > it. Jolsa did all that IIRC. > > All I know is that we had some 'fun' bugs around there sometime back. Note, the function tracer now has a lot better recursion protection than it use to. -- Steve -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, 20 Feb 2014, Peter Zijlstra wrote: > On Thu, Feb 20, 2014 at 01:03:16PM -0500, Vince Weaver wrote: > > attached, it's not very big. > > This is I think the relevant bit: > >pec_1076_warn-2804 [000] d... 147.926153: x86_pmu_disable: > x86_pmu_disable >pec_1076_warn-2804 [000] d... 147.926153: x86_pmu_state: Events: { >pec_1076_warn-2804 [000] d... 147.926156: x86_pmu_state: 0: state: .R > config: ( (null)) >pec_1076_warn-2804 [000] d... 147.926158: x86_pmu_state: 33: state: > AR config: 0 (88011ac99800) >pec_1076_warn-2804 [000] d... 147.926159: x86_pmu_state: } >pec_1076_warn-2804 [000] d... 147.926160: x86_pmu_state: n_events: 1, > n_added: 0, n_txn: 1 >pec_1076_warn-2804 [000] d... 147.926161: x86_pmu_state: Assignment: { >pec_1076_warn-2804 [000] d... 147.926162: x86_pmu_state: 0->33 tag: 1 > config: 0 (88011ac99800) >pec_1076_warn-2804 [000] d... 147.926163: x86_pmu_state: } >pec_1076_warn-2804 [000] d... 147.926166: collect_events: Adding event: > 1 (880119ec8800) >pec_1076_warn-2804 [000] d... 147.926170: collect_events: Adding event: > 0 (8800c9e01800) >pec_1076_warn-2804 [000] d... 147.926172: collect_events: Adding event: > 4 (8800cbab2c00) >pec_1076_warn-2804 [000] d... 147.926177: x86_pmu_enable: x86_pmu_enable >pec_1076_warn-2804 [000] d... 147.926177: x86_pmu_state: Events: { >pec_1076_warn-2804 [000] d... 147.926179: x86_pmu_state: 0: state: .R > config: ( (null)) >pec_1076_warn-2804 [000] d... 147.926181: x86_pmu_state: 33: state: > AR config: 0 (88011ac99800) >pec_1076_warn-2804 [000] d... 147.926182: x86_pmu_state: } >pec_1076_warn-2804 [000] d... 147.926184: x86_pmu_state: n_events: 2, > n_added: 2, n_txn: 2 >pec_1076_warn-2804 [000] d... 147.926184: x86_pmu_state: Assignment: { >pec_1076_warn-2804 [000] d... 147.926186: x86_pmu_state: 0->33 tag: 1 > config: 0 (88011ac99800) >pec_1076_warn-2804 [000] d... 147.926188: x86_pmu_state: 1->0 tag: 1 > config: 1 (880119ec8800) >pec_1076_warn-2804 [000] d... 147.926188: x86_pmu_state: } >pec_1076_warn-2804 [000] d... 147.926190: x86_pmu_enable: S0: hwc->idx: > 33, hwc->last_cpu: 0, hwc->last_tag: 1 hwc->state: 0 >pec_1076_warn-2804 [000] d... 147.926191: x86_pmu_enable: starting: 0 > > so it does indeed look like n_added got scrambled; we started out with 1 > event on disable; we've got 2 events on enable, but n_added is also 2, > which would suggest we had 0 on disable. > > That makes us want to (re)start the NMI counter alright. Might be relevant: check the last_cpu values. Right before the above it looks like the thread gets moved from CPU 1 to CPU 0 (possibly as a result of the long chain started with the close() of the tracepoint event), so the problem NMI watchdog event being enabled is a different one than the one that was disabled just before. Vince -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, Feb 20, 2014 at 07:15:38PM +0100, Peter Zijlstra wrote: > On Thu, Feb 20, 2014 at 09:31:19AM -0800, Andi Kleen wrote: > > Peter Zijlstra writes: > > > > > > It will; trace_printk() works without -pg, I think you didn't read the > > > instructions very well. > > > > Ok, you enable and disable it again. I won't guess why you do that. > > To grow the trace buffers; it starts with just a few pages per cpu; once > you switch to an actual tracer it allocates a sensible amount. > > You can grow it with another interface; but then I'd have to like > remember what that was and how big the normal buffers are. Simply > toggling between tracers is far easier. I see. > > > > And there's a very good reason not to apply your patch; you can route > > > the function tracer into perf, guess what happens when perf calls the > > > function tracer again :-) > > > > How? > > I think by using the /debug/tracing/events/ftrace/function event, but > I'm not actually sure, I've never used it nor did I write the code to do > it. Jolsa did all that IIRC. > > All I know is that we had some 'fun' bugs around there sometime back. Ok. I don't think it would be a problem in any case, the ftrace code has recursion protection. -Andi -- a...@linux.intel.com -- Speaking for myself only. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, Feb 20, 2014 at 01:03:16PM -0500, Vince Weaver wrote: > attached, it's not very big. This is I think the relevant bit: pec_1076_warn-2804 [000] d... 147.926153: x86_pmu_disable: x86_pmu_disable pec_1076_warn-2804 [000] d... 147.926153: x86_pmu_state: Events: { pec_1076_warn-2804 [000] d... 147.926156: x86_pmu_state: 0: state: .R config: ( (null)) pec_1076_warn-2804 [000] d... 147.926158: x86_pmu_state: 33: state: AR config: 0 (88011ac99800) pec_1076_warn-2804 [000] d... 147.926159: x86_pmu_state: } pec_1076_warn-2804 [000] d... 147.926160: x86_pmu_state: n_events: 1, n_added: 0, n_txn: 1 pec_1076_warn-2804 [000] d... 147.926161: x86_pmu_state: Assignment: { pec_1076_warn-2804 [000] d... 147.926162: x86_pmu_state: 0->33 tag: 1 config: 0 (88011ac99800) pec_1076_warn-2804 [000] d... 147.926163: x86_pmu_state: } pec_1076_warn-2804 [000] d... 147.926166: collect_events: Adding event: 1 (880119ec8800) pec_1076_warn-2804 [000] d... 147.926170: collect_events: Adding event: 0 (8800c9e01800) pec_1076_warn-2804 [000] d... 147.926172: collect_events: Adding event: 4 (8800cbab2c00) pec_1076_warn-2804 [000] d... 147.926177: x86_pmu_enable: x86_pmu_enable pec_1076_warn-2804 [000] d... 147.926177: x86_pmu_state: Events: { pec_1076_warn-2804 [000] d... 147.926179: x86_pmu_state: 0: state: .R config: ( (null)) pec_1076_warn-2804 [000] d... 147.926181: x86_pmu_state: 33: state: AR config: 0 (88011ac99800) pec_1076_warn-2804 [000] d... 147.926182: x86_pmu_state: } pec_1076_warn-2804 [000] d... 147.926184: x86_pmu_state: n_events: 2, n_added: 2, n_txn: 2 pec_1076_warn-2804 [000] d... 147.926184: x86_pmu_state: Assignment: { pec_1076_warn-2804 [000] d... 147.926186: x86_pmu_state: 0->33 tag: 1 config: 0 (88011ac99800) pec_1076_warn-2804 [000] d... 147.926188: x86_pmu_state: 1->0 tag: 1 config: 1 (880119ec8800) pec_1076_warn-2804 [000] d... 147.926188: x86_pmu_state: } pec_1076_warn-2804 [000] d... 147.926190: x86_pmu_enable: S0: hwc->idx: 33, hwc->last_cpu: 0, hwc->last_tag: 1 hwc->state: 0 pec_1076_warn-2804 [000] d... 147.926191: x86_pmu_enable: starting: 0 so it does indeed look like n_added got scrambled; we started out with 1 event on disable; we've got 2 events on enable, but n_added is also 2, which would suggest we had 0 on disable. That makes us want to (re)start the NMI counter alright. I've got to go run errands for a bit; hopefully I can find a bit of time later tonight to look this over. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, Feb 20, 2014 at 12:46:12PM -0500, Steven Rostedt wrote: > On Thu, 20 Feb 2014 12:43:51 -0500 > Steven Rostedt wrote: > > > As a disable_trace_on_warning is more of a modification to the kernel, > > I'm leaning to adding a /proc/sys/kernel/ftrace_disable_on_warning > > file. This keeps it in line with ftrace_dump_on_oops, which is the most > > similar feature. > > Nevermind. There's a "traceoff_on_warning" already there that does > exactly this :-/ Bugger; I so missed that looking for it earlier today. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, Feb 20, 2014 at 09:31:19AM -0800, Andi Kleen wrote: > Peter Zijlstra writes: > > > > It will; trace_printk() works without -pg, I think you didn't read the > > instructions very well. > > Ok, you enable and disable it again. I won't guess why you do that. To grow the trace buffers; it starts with just a few pages per cpu; once you switch to an actual tracer it allocates a sensible amount. You can grow it with another interface; but then I'd have to like remember what that was and how big the normal buffers are. Simply toggling between tracers is far easier. > > And there's a very good reason not to apply your patch; you can route > > the function tracer into perf, guess what happens when perf calls the > > function tracer again :-) > > How? I think by using the /debug/tracing/events/ftrace/function event, but I'm not actually sure, I've never used it nor did I write the code to do it. Jolsa did all that IIRC. All I know is that we had some 'fun' bugs around there sometime back. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, 20 Feb 2014, Peter Zijlstra wrote: > On Wed, Feb 19, 2014 at 05:34:49PM -0500, Vince Weaver wrote: > > So where would the NMI counter event get disabled? Would it never get > > disabled, just because it's always running and always gets the same fixed > > slot? Why isn't this a problem all the time, not just with corner cases? > > Well it could get another counter assignment I suppose; there's no > guarantee it always ends up in the fixed counter although that's rather > likely. > > > Is somehow n_added getting confused? > > I'd expect worse than a warning if that happened, but who knows. > > You could try with the below; make sure you've got CONFIG_FUNCTION_TRACER=y. > # cat /debug/tracing/trace | bzip2 -9 > ~/trace.bz2 > > And send that (and look at it of course). attached, it's not very big. Vince trace.bz2 Description: Binary data
Re: x86_pmu_start WARN_ON.
On Thu, 20 Feb 2014 12:43:51 -0500 Steven Rostedt wrote: > As a disable_trace_on_warning is more of a modification to the kernel, > I'm leaning to adding a /proc/sys/kernel/ftrace_disable_on_warning > file. This keeps it in line with ftrace_dump_on_oops, which is the most > similar feature. Nevermind. There's a "traceoff_on_warning" already there that does exactly this :-/ -- Steve -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, 20 Feb 2014 18:00:18 +0100 Peter Zijlstra wrote: > On Thu, Feb 20, 2014 at 11:26:00AM -0500, Steven Rostedt wrote: > > On Thu, 20 Feb 2014 11:08:30 +0100 > > Peter Zijlstra wrote: > > > > > @rostedt: WTF is disable_trace_on_warning a boot option only? > > > > Laziness. > > > > > > I'll add a sysctl for it in 3.15. > > /debug/tracing/options/ was where I was looking first. > > Its a bit weird to have half the options in options/ and the other half > as sysctl in /proc/sys/kernel/ Yeah, that is somewhat strange. The /proc/sys/kernel/ ftrace options is somewhat historical. Things that were more about how ftrace works outside of the tracing word exists there. For example, ftrace_dump_on_oops is there, because it is about modifying the way the kernel works on crash and not how ftrace works. This is something similar, it's about how the kernel works, not how ftrace works. I try to have the debugfs options be ways to modify how ftrace works, like formatting, what it prints, etc. But things that modify the way the kernel work, I like to keep in /proc/sys/kernel. That is, ftrace_dump_on_oops is what happens on kernel crash, stack_tracer_enabled is something on the boarder. That is, it should probably have been in the tracing facility, as it is similar to something like the irqsoff tracer. But as it wasn't to be a tracer in itself, it was added to proc. The ftrace_enabled, is a big on off switch to enable or disable ALL function tracing. Even for perf and friends. I added a trace option "function-trace" that will just disable function tracing for ftrace and not affect other users of function tracing. As a disable_trace_on_warning is more of a modification to the kernel, I'm leaning to adding a /proc/sys/kernel/ftrace_disable_on_warning file. This keeps it in line with ftrace_dump_on_oops, which is the most similar feature. -- Steve -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
Peter Zijlstra writes: > > It will; trace_printk() works without -pg, I think you didn't read the > instructions very well. Ok, you enable and disable it again. I won't guess why you do that. > > And there's a very good reason not to apply your patch; you can route > the function tracer into perf, guess what happens when perf calls the > function tracer again :-) How? -Andi -- a...@linux.intel.com -- Speaking for myself only -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, Feb 20, 2014 at 11:26:00AM -0500, Steven Rostedt wrote: > On Thu, 20 Feb 2014 11:08:30 +0100 > Peter Zijlstra wrote: > > > @rostedt: WTF is disable_trace_on_warning a boot option only? > > Laziness. > > > I'll add a sysctl for it in 3.15. /debug/tracing/options/ was where I was looking first. Its a bit weird to have half the options in options/ and the other half as sysctl in /proc/sys/kernel/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, 20 Feb 2014 11:08:30 +0100 Peter Zijlstra wrote: > @rostedt: WTF is disable_trace_on_warning a boot option only? Laziness. I'll add a sysctl for it in 3.15. -- Steve -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
Peter Zijlstra writes: > On Wed, Feb 19, 2014 at 05:34:49PM -0500, Vince Weaver wrote: >> So where would the NMI counter event get disabled? Would it never get >> disabled, just because it's always running and always gets the same fixed >> slot? Why isn't this a problem all the time, not just with corner cases? > > Well it could get another counter assignment I suppose; there's no > guarantee it always ends up in the fixed counter although that's rather > likely. > >> Is somehow n_added getting confused? > > I'd expect worse than a warning if that happened, but who knows. > > You could try with the below; make sure you've got CONFIG_FUNCTION_TRACER=y. > > Then (assuming debugfs is mounted in /debug ; most modern distros have > it at /sys/kernel/debug but they're wrong!): > > # echo 0 > /debug/tracing/tracing_on > # echo function > /debug/tracing/current_tracer > # echo nop > /debug/tracing/current_tracer > # echo 0 > /debug/tracing/trace > # echo 1 > /debug/tracing/tracing_on > > And make it go *BOOM*, then: > > # cat /debug/tracing/trace | bzip2 -9 > ~/trace.bz2 This won't work unless you apply this patch http://lkml.org/lkml/2013/10/20/94 -Andi -- a...@linux.intel.com -- Speaking for myself only -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, Feb 20, 2014 at 07:47:23AM -0800, Andi Kleen wrote: > Peter Zijlstra writes: > > > On Wed, Feb 19, 2014 at 05:34:49PM -0500, Vince Weaver wrote: > >> So where would the NMI counter event get disabled? Would it never get > >> disabled, just because it's always running and always gets the same fixed > >> slot? Why isn't this a problem all the time, not just with corner cases? > > > > Well it could get another counter assignment I suppose; there's no > > guarantee it always ends up in the fixed counter although that's rather > > likely. > > > >> Is somehow n_added getting confused? > > > > I'd expect worse than a warning if that happened, but who knows. > > > > You could try with the below; make sure you've got CONFIG_FUNCTION_TRACER=y. > > > > Then (assuming debugfs is mounted in /debug ; most modern distros have > > it at /sys/kernel/debug but they're wrong!): > > > > # echo 0 > /debug/tracing/tracing_on > > # echo function > /debug/tracing/current_tracer > > # echo nop > /debug/tracing/current_tracer > > # echo 0 > /debug/tracing/trace > > # echo 1 > /debug/tracing/tracing_on > > > > And make it go *BOOM*, then: > > > > # cat /debug/tracing/trace | bzip2 -9 > ~/trace.bz2 > > This won't work unless you apply this patch > > http://lkml.org/lkml/2013/10/20/94 It will; trace_printk() works without -pg, I think you didn't read the instructions very well. And there's a very good reason not to apply your patch; you can route the function tracer into perf, guess what happens when perf calls the function tracer again :-) But yes; you can use it if you don't do that. But I didn't want the function trace, so who cares. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Wed, Feb 19, 2014 at 05:34:49PM -0500, Vince Weaver wrote: > So where would the NMI counter event get disabled? Would it never get > disabled, just because it's always running and always gets the same fixed > slot? Why isn't this a problem all the time, not just with corner cases? Well it could get another counter assignment I suppose; there's no guarantee it always ends up in the fixed counter although that's rather likely. > Is somehow n_added getting confused? I'd expect worse than a warning if that happened, but who knows. You could try with the below; make sure you've got CONFIG_FUNCTION_TRACER=y. Then (assuming debugfs is mounted in /debug ; most modern distros have it at /sys/kernel/debug but they're wrong!): # echo 0 > /debug/tracing/tracing_on # echo function > /debug/tracing/current_tracer # echo nop > /debug/tracing/current_tracer # echo 0 > /debug/tracing/trace # echo 1 > /debug/tracing/tracing_on And make it go *BOOM*, then: # cat /debug/tracing/trace | bzip2 -9 > ~/trace.bz2 And send that (and look at it of course). @rostedt: WTF is disable_trace_on_warning a boot option only? --- arch/x86/kernel/cpu/perf_event.c | 65 +++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 895604f2e916..d3edf301deb8 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -509,6 +509,42 @@ void x86_pmu_disable_all(void) } } +static void x86_pmu_state(struct pmu *pmu) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int i; + + trace_printk("Events: {\n"); + for (i = 0; i < X86_PMC_IDX_MAX; i++) { + struct perf_event *event = cpuc->events[i]; + int active = test_bit(i, cpuc->active_mask); + int running = test_bit(i, cpuc->running); + + if (active || running) { + trace_printk(" %d: state: %c%c config: %lx (%p)\n", + i, active ? 'A' : '.', + running ? 'R' : '.', + event ? event->attr.config : -1L, + event); + } + } + trace_printk("}\n"); + + + trace_printk("n_events: %d, n_added: %d, n_txn: %d\n", + cpuc->n_events, cpuc->n_added, cpuc->n_txn); + trace_printk("Assignment: {\n"); + for (i = 0; i < cpuc->n_events; i++) { + struct perf_event *event = cpuc->event_list[i]; + + trace_printk(" %d->%d tag: %lx config: %lx (%p)\n", + i, cpuc->assign[i], cpuc->tags[i], + event ? event->attr.config : -1L, + event); + } + trace_printk("}\n"); +} + static void x86_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -524,6 +560,9 @@ static void x86_pmu_disable(struct pmu *pmu) barrier(); x86_pmu.disable_all(); + + trace_printk("x86_pmu_disable\n"); + x86_pmu_state(pmu); } void x86_pmu_enable_all(int added) @@ -820,6 +859,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, return -EINVAL; cpuc->event_list[n] = leader; n++; + trace_printk("Adding event: %lx (%p)\n", leader->attr.config, leader); } if (!dogrp) return n; @@ -834,6 +874,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, cpuc->event_list[n] = event; n++; + trace_printk("Adding event: %lx (%p)\n", event->attr.config, event); } return n; } @@ -885,6 +926,9 @@ static void x86_pmu_enable(struct pmu *pmu) if (cpuc->enabled) return; + trace_printk("x86_pmu_enable\n"); + x86_pmu_state(pmu); + if (cpuc->n_added) { int n_running = cpuc->n_events - cpuc->n_added; /* @@ -898,6 +942,11 @@ static void x86_pmu_enable(struct pmu *pmu) event = cpuc->event_list[i]; hwc = &event->hw; + trace_printk("K%d: hwc->idx: %d, hwc->last_cpu: %d," +" hwc->last_tag: %lx hwc->state: %x\n", +i, hwc->idx, hwc->last_cpu, +hwc->last_tag, hwc->state); + /* * we can avoid reprogramming counter if: * - assigned same counter as last time @@ -915,6 +964,8 @@ static void x86_pmu_enable(struct pmu *pmu) if (hwc->state & PERF_HES_STOPPED) hwc->state |= PERF_HES_ARCH;
Re: x86_pmu_start WARN_ON.
On Wed, 19 Feb 2014, Peter Zijlstra wrote: > So when we add a new event (or more) we compute a mapping from event to > counter. Then we disable all (pre existing) events that moved to a new > location, then we enable all events (insert HES_ARCH) that were running > but got relocated and the new events. > > Of course the code is horrible, but I think the above is what it does. The code is a pain, with all of the various ctx types, and the nested calls to perf_pmu_enable/perf_pmu_disable in particular. So I've hacked the code so that it shows the last location where hwc->state was cleared for an event (i.e., where it was last enabled). The NMI counter event was enabled in _perf_install_in_context (presumably at creation time) and as far as I can tell never disabled. Then when x86_pmu_enable/x86_pmu_start gets called at the end of _perf_event_context_sched_in() it is sad because the NMI counter event is not disabled. So where would the NMI counter event get disabled? Would it never get disabled, just because it's always running and always gets the same fixed slot? Why isn't this a problem all the time, not just with corner cases? Is somehow n_added getting confused? Vince -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Tue, Feb 18, 2014 at 05:20:57PM -0500, Vince Weaver wrote: > On Tue, 18 Feb 2014, Vince Weaver wrote: > > > On Mon, 17 Feb 2014, Peter Zijlstra wrote: > > > > > Enable CONFIG_FRAME_POINTER for better stack traces; I suspect the > > > list_del_event() is just random stack garbage. The path that makes sense > > > is: > > > wait_rcu()->__wait_for_common()->schedule_timeout() > > > > Here's an updated stack trace on 3.14-rc3 with CONFIG_FRAME_POINTER > > enabled, in case it's helpful: > > Still chasing this, although all I can add are these debug messages: > > [ 140.812003] PROBLEM: n_events=2 n_added=2 VMW: idx=33 state=f00 type=0 > config=0 samp_per=5e6069eb0 > [ 140.812003] ALL: VMW: Num=0 idx=33 state=f00 type=0 config=0 > samp_per=5e6069eb0 > [ 140.812003] ALL: VMW: Num=1 idx=0 state=3 type=0 config=1 samp_per=0 > > So when the WARN gets triggered there only only two events in the event > list, the NMI watchdog which has already been enabled somehow (that f00 > I stuck in, pmu_start sets it to f00 instead of 00 to make sure it wasn't > something stomping on memory) and the precise instructions event. > > I still have a hard time following what all the schedule in code is doing. Yes, I got it once, then promptly forgot it. It all became the thing it is because AMD Fam15 had some horrible constraints. So in general it tries to map events to counters in order of decreasing constraints (so it starts with the most constrained events). It all gets a bit funny due to overlapping constraints; see commit bc1738f6ee830 for a little blurb on what the overlap thing is about. So when we add a new event (or more) we compute a mapping from event to counter. Then we disable all (pre existing) events that moved to a new location, then we enable all events (insert HES_ARCH) that were running but got relocated and the new events. Of course the code is horrible, but I think the above is what it does. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Tue, 18 Feb 2014, Vince Weaver wrote: > On Mon, 17 Feb 2014, Peter Zijlstra wrote: > > > Enable CONFIG_FRAME_POINTER for better stack traces; I suspect the > > list_del_event() is just random stack garbage. The path that makes sense > > is: > > wait_rcu()->__wait_for_common()->schedule_timeout() > > Here's an updated stack trace on 3.14-rc3 with CONFIG_FRAME_POINTER > enabled, in case it's helpful: Still chasing this, although all I can add are these debug messages: [ 140.812003] PROBLEM: n_events=2 n_added=2 VMW: idx=33 state=f00 type=0 config=0 samp_per=5e6069eb0 [ 140.812003] ALL: VMW: Num=0 idx=33 state=f00 type=0 config=0 samp_per=5e6069eb0 [ 140.812003] ALL: VMW: Num=1 idx=0 state=3 type=0 config=1 samp_per=0 So when the WARN gets triggered there only only two events in the event list, the NMI watchdog which has already been enabled somehow (that f00 I stuck in, pmu_start sets it to f00 instead of 00 to make sure it wasn't something stomping on memory) and the precise instructions event. I still have a hard time following what all the schedule in code is doing. Vince -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Mon, 17 Feb 2014, Peter Zijlstra wrote: > Enable CONFIG_FRAME_POINTER for better stack traces; I suspect the > list_del_event() is just random stack garbage. The path that makes sense > is: > wait_rcu()->__wait_for_common()->schedule_timeout() Here's an updated stack trace on 3.14-rc3 with CONFIG_FRAME_POINTER enabled, in case it's helpful: [ 237.004035] [ cut here ] [ 237.008016] WARNING: CPU: 1 PID: 2791 at arch/x86/kernel/cpu/perf_event.c:1076 x86_pmu_start+0x41/0x113() [ 237.008016] Modules linked in: cpufreq_userspace cpufreq_stats cpufreq_powersave cpufreq_conservative f71882fg mcs7830 usbnet evdev wmi acpi_cpufreq video coretemp psmouse processor thermal_sys button ohci_pci ohci_hcd serio_raw pcspkr i2c_nforce2 sg ehci_pci ehci_hcd sd_mod usbcore usb_common [ 237.008016] CPU: 1 PID: 2791 Comm: pec_1076_warn Not tainted 3.14.0-rc3+ #10 [ 237.008016] Hardware name: AOpen DE7000/nMCP7ALPx-DE R1.06 Oct.19.2012, BIOS 080015 10/19/2012 [ 237.008016] 0434 8800cae6b9d8 8150e4ff 0434 [ 237.008016] 8800cae6ba18 8103d64e 8800cae6ba48 [ 237.008016] 81012b10 0021 88011b31dc00 88011fc8b940 [ 237.008016] Call Trace: [ 237.008016] [] dump_stack+0x49/0x62 [ 237.008016] [] warn_slowpath_common+0x7c/0x96 [ 237.008016] [] ? x86_pmu_start+0x41/0x113 [ 237.008016] [] warn_slowpath_null+0x15/0x17 [ 237.008016] [] x86_pmu_start+0x41/0x113 [ 237.008016] [] x86_pmu_enable+0x1d8/0x27d [ 237.008016] [] perf_pmu_enable+0x21/0x23 [ 237.008016] [] perf_event_context_sched_in+0x79/0x9a [ 237.008016] [] __perf_event_task_sched_in+0x26/0x10f [ 237.008016] [] ? idle_balance+0x125/0x161 [ 237.008016] [] finish_task_switch+0x41/0xa8 [ 237.008016] [] __schedule+0x48f/0x4e3 [ 237.008016] [] ? __hrtimer_start_range_ns+0x2e8/0x2fa [ 237.008016] [] schedule+0x6e/0x70 [ 237.008016] [] schedule_timeout+0x22/0xbd [ 237.008016] [] ? dump_stack+0x62/0x62 [ 237.008016] [] __wait_for_common+0xc6/0x142 [ 237.008016] [] ? try_to_wake_up+0x1a8/0x1a8 [ 237.008016] [] ? T.944+0x1de/0x1de [ 237.008016] [] wait_for_completion+0x1f/0x21 [ 237.008016] [] wait_rcu_gp+0x46/0x4f [ 237.008016] [] ? wait_rcu_gp+0x4f/0x4f [ 237.008016] [] synchronize_sched+0x29/0x2b [ 237.008016] [] perf_trace_event_unreg+0x36/0xc8 [ 237.008016] [] perf_trace_destroy+0x36/0x46 [ 237.008016] [] tp_perf_event_destroy+0x9/0xb [ 237.008016] [] __free_event+0x35/0x5d [ 237.008016] [] free_event+0x10d/0x116 [ 237.008016] [] perf_event_release_kernel+0x7d/0x88 [ 237.008016] [] put_event+0xa3/0xac [ 237.008016] [] perf_release+0x10/0x14 [ 237.008016] [] __fput+0xeb/0x1b0 [ 237.008016] [] fput+0x9/0xb [ 237.008016] [] task_work_run+0x83/0x9b [ 237.008016] [] do_notify_resume+0x58/0x5f [ 237.008016] [] int_signal+0x12/0x17 [ 237.008016] ---[ end trace a07d99177587379f ]--- [ 237.008016] IN: VMW: idx=33 state=0 type=0 config=0 samp_per=5e619d890 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, Feb 13, 2014 at 05:13:20PM -0500, Vince Weaver wrote: > On Thu, 13 Feb 2014, Vince Weaver wrote: > > > The plot thickens. The WARN_ON is not caused by the cycles event that we > > open, but it's caused by the NMI Watchdog cycles event. > > The WARN_ON_ONCE at line 1076 in perf_event.c is triggering because > in x86_pmu_enable() is calling x86_pmu_start() for all of the active x86 > events (three plus the NMI watchdog) the NMI watchdog is unexpectedly > not having PERF_HES_STOPPED set (it's hw.state is 0). Cute, that is indeed unexpected. > What's the deal with the PERF_HES_STOPPED name? Is HES an acronym? > Or is it just a male event? Hardware Event State > Also it's not really clear what PERF_HES_ARCH indicates. I ran out of names it seems; its used in the reschedule case where we relocate a stopped event. We save the STOPPED state into ARCH (because we're going to destroy the STOPPED state by stopping everybody), so that we know not to (re)enable the event when its on its new location. The comment in x86_pmu_enable() near where we set ARCH was supposed to communicate this. > Things rapidly get complicated beyond that, as the NMI watchdog is a > kernel-created event bound to the CPU, wheras 2 of the events are x86 hw > events with a breakpoint-event groupleader (and the fact one of the events > has precise set). For this code that _should_ not matter much; they're 3 events and we mapped them into hardware counters. So the precise has to run on cnt0 on Core2, the NMI is simple enough to fit (and we prefer) fixed purpose counters. So there _should_ not be a reshuffle. Although I should probably assert these _should_ thingies. > From the stacktrace it looks like it is the close of a completely > unrelated tracepoint event that triggers this all off, but I'm not > sure why a list_del_event() call of the tracepoint name would > trigger a schedule_timeout() and an ensuing __perf_event_task_sched_in() > which is what eventually triggers the problem. Enable CONFIG_FRAME_POINTER for better stack traces; I suspect the list_del_event() is just random stack garbage. The path that makes sense is: wait_rcu()->__wait_for_common()->schedule_timeout() > Scattering printks around doesn't see to work because a lot of the related > calls are happening in contexts where printks don't really work. > > Anyway I just wanted to summarize my findings as I might not have a chance > to look at this again for a while. For completion I'm including the > backtrace below. Sure, much appreciated. I'll go read up on the event schedule code, its been a while since I stared at that in too much detail. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, 13 Feb 2014, Vince Weaver wrote: > The plot thickens. The WARN_ON is not caused by the cycles event that we > open, but it's caused by the NMI Watchdog cycles event. The WARN_ON_ONCE at line 1076 in perf_event.c is triggering because in x86_pmu_enable() is calling x86_pmu_start() for all of the active x86 events (three plus the NMI watchdog) the NMI watchdog is unexpectedly not having PERF_HES_STOPPED set (it's hw.state is 0). What's the deal with the PERF_HES_STOPPED name? Is HES an acronym? Or is it just a male event? Also it's not really clear what PERF_HES_ARCH indicates. Things rapidly get complicated beyond that, as the NMI watchdog is a kernel-created event bound to the CPU, wheras 2 of the events are x86 hw events with a breakpoint-event groupleader (and the fact one of the events has precise set). >From the stacktrace it looks like it is the close of a completely unrelated tracepoint event that triggers this all off, but I'm not sure why a list_del_event() call of the tracepoint name would trigger a schedule_timeout() and an ensuing __perf_event_task_sched_in() which is what eventually triggers the problem. Scattering printks around doesn't see to work because a lot of the related calls are happening in contexts where printks don't really work. Anyway I just wanted to summarize my findings as I might not have a chance to look at this again for a while. For completion I'm including the backtrace below. Vince [ 49.972034] [ cut here ] [ 49.976014] WARNING: CPU: 1 PID: 2867 at arch/x86/kernel/cpu/perf_event.c:1076 x86_pmu_start+0x38/0x102() [ 49.976014] Modules linked in: cpufreq_userspace cpufreq_stats cpufreq_powersave cpufreq_conservative f71882fg evdev mcs7830 usbnet ohci_pci video acpi_cpufreq processor thermal_sys wmi psmouse serio_raw pcspkr coretemp button ohci_hcd i2c_nforce2 ehci_pci ehci_hcd sg sd_mod usbcore usb_common [ 49.976014] CPU: 1 PID: 2867 Comm: pec_1076_warn Not tainted 3.14.0-rc2+ #7 [ 49.976014] Hardware name: AOpen DE7000/nMCP7ALPx-DE R1.06 Oct.19.2012, BIOS 080015 10/19/2012 [ 49.976014] 817f256d 814e750b [ 49.976014] 8103bf2c 8800ca005820 81012119 88011fc93400 [ 49.976014] 0021 88011b31dc00 88011fc8b940 [ 49.976014] Call Trace: [ 49.976014] [] ? dump_stack+0x41/0x56 [ 49.976014] [] ? warn_slowpath_common+0x79/0x92 [ 49.976014] [] ? x86_pmu_start+0x38/0x102 [ 49.976014] [] ? x86_pmu_start+0x38/0x102 [ 49.976014] [] ? x86_pmu_enable+0x1d3/0x285 [ 49.976014] [] ? perf_event_context_sched_in+0x6d/0x8d [ 49.976014] [] ? __perf_event_task_sched_in+0x21/0x108 [ 49.976014] [] ? idle_balance+0x11a/0x157 [ 49.976014] [] ? finish_task_switch+0x40/0xa5 [ 49.976014] [] ? __schedule+0x46a/0x4bd [ 49.976014] [] ? schedule_timeout+0x1d/0xb4 [ 49.976014] [] ? list_del_event+0x78/0x8f [ 49.976014] [] ? dump_stack+0x56/0x56 [ 49.976014] [] ? __wait_for_common+0xce/0x14a [ 49.976014] [] ? try_to_wake_up+0x19a/0x19a [ 49.976014] [] ? get_tracepoint+0x20/0x53 [ 49.976014] [] ? T.944+0x1c8/0x1c8 [ 49.976014] [] ? wait_rcu_gp+0x3f/0x46 [ 49.976014] [] ? wait_rcu_gp+0x46/0x46 [ 49.976014] [] ? perf_trace_event_unreg+0x2e/0xbd [ 49.976014] [] ? perf_trace_destroy+0x2e/0x3b [ 49.976014] [] ? __free_event+0x2d/0x52 [ 49.976014] [] ? perf_event_release_kernel+0x74/0x7b [ 49.976014] [] ? perf_release+0x10/0x14 [ 49.976014] [] ? __fput+0xdf/0x1a4 [ 49.976014] [] ? task_work_run+0x7f/0x96 [ 49.976014] [] ? int_signal+0x12/0x17 [ 49.976014] ---[ end trace 0b1abb2bc23cc0c5 ]--- [ 49.976014] VMW: idx=33 state=0 type=0 config=0 samp_per=5e5fc1760 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, 13 Feb 2014, Vince Weaver wrote: > On Wed, 12 Feb 2014, Vince Weaver wrote: > > > > It is triggered in this case when you have: > > > > An event group of breakpoint, cycles, branches > > An event of instructions with precise=1 > > A tracepoint > > > > and then you close the tracepoint. > > and for what it's worth, it's the cycles event (type=0 config=0) that's > causing the WARN_ON. The plot thickens. The WARN_ON is not caused by the cycles event that we open, but it's caused by the NMI Watchdog cycles event. I'm not sure if that's useful info or not. Vince -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Wed, 12 Feb 2014, Vince Weaver wrote: > On Tue, 11 Feb 2014, Peter Zijlstra wrote: > > > > I'll see if I can run through the reproduction case by hand. > > I've come up with an even simpler test case with all of the extraneous > settings removed. Included below. > > It is triggered in this case when you have: > > An event group of breakpoint, cycles, branches > An event of instructions with precise=1 > A tracepoint > > and then you close the tracepoint. and for what it's worth, it's the cycles event (type=0 config=0) that's causing the WARN_ON. It looks like I'm going to have to resort to sprinkling the code with printk()s to solve this one. Vince -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Tue, 11 Feb 2014, Peter Zijlstra wrote: > > I'll see if I can run through the reproduction case by hand. I've come up with an even simpler test case with all of the extraneous settings removed. Included below. It is triggered in this case when you have: An event group of breakpoint, cycles, branches An event of instructions with precise=1 A tracepoint and then you close the tracepoint. --- /* log_to_code output from ./warning29.log */ /* by Vince Weaver #include #include #include #include #include static int fd[1024]; static struct perf_event_attr pe[1024]; int perf_event_open(struct perf_event_attr *hw_event_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags) { return syscall(__NR_perf_event_open,hw_event_uptr, pid, cpu, group_fd, flags); } int main(int argc, char **argv) { /* Random Seed was 1392048997 */ /* /proc/sys/kernel/perf_event_max_sample_rate was 10 */ memset(&pe[3],0,sizeof(struct perf_event_attr)); pe[3].type=PERF_TYPE_BREAKPOINT; pe[3].size=72; pe[3].bp_type=HW_BREAKPOINT_R|HW_BREAKPOINT_W; /*3*/ pe[3].bp_len=0x8; fd[3]=perf_event_open(&pe[3], 0, /* current thread */ -1, /* all cpus */ -1, /* group leader */ 0); memset(&pe[23],0,sizeof(struct perf_event_attr)); pe[23].type=PERF_TYPE_HARDWARE; pe[23].config=PERF_COUNT_HW_INSTRUCTIONS; pe[23].precise_ip=1; /* this needs to be set to trigger? */ fd[23]=perf_event_open(&pe[23], 0, /* current thread */ -1, /* all cpus */ -1, /* group leader */ 0 /*0*/ ); memset(&pe[4],0,sizeof(struct perf_event_attr)); pe[4].type=PERF_TYPE_HARDWARE; pe[4].config=PERF_COUNT_HW_CPU_CYCLES; fd[4]=perf_event_open(&pe[4], 0, /* current thread */ -1, /* all cpus */ fd[3], /* 3 is group leader */ 0 /*0*/ ); memset(&pe[5],0,sizeof(struct perf_event_attr)); pe[5].type=PERF_TYPE_TRACEPOINT; pe[5].config=0x1d; /* 29 irq_vectors/error_apic_entry */ /* Config of tracepoint doesn't seem to matter */ fd[5]=perf_event_open(&pe[5], 0, /* current thread */ -1, /* all cpus */ -1, /* New Group Leader */ 0 /*0*/ ); memset(&pe[15],0,sizeof(struct perf_event_attr)); pe[15].type=PERF_TYPE_HARDWARE; pe[15].config=PERF_COUNT_HW_BRANCH_INSTRUCTIONS; fd[15]=perf_event_open(&pe[15], 0, /* current thread */ -1, /* all cpus */ fd[3], /* 3 is group leader */ 0 /*0*/ ); close(fd[5]); return 0; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Mon, Feb 10, 2014 at 04:26:29PM -0500, Vince Weaver wrote: > On Thu, 30 Jan 2014, Dave Jones wrote: > > > I gave Vince's perf_fuzzer a run, hoping to trigger a different perf bug > > that I've been seeing. Instead I hit a different bug. > > I've been seeing that WARN_ON for months but it was hard to reproduce. > After a lot of hassle (and scores or reboots) I managed to narrow this to > a small test case. > > Unfortunately it only reproduces on my core2 machine. My ivb machine > won't trigger it because it doesn't support PERF_COUNT_HW_INSTRUCTIONS > when precise_ip=1; is set. > > This is with 3.14-rc2 > > The test case is attached, the warning message from my machine is below. > > Vince > > [ 47.296031] [ cut here ] > [ 47.300013] WARNING: CPU: 0 PID: 2821 at > arch/x86/kernel/cpu/perf_event.c:1076 x86_pmu_start+0x46/0xee() > [ 47.300013] Modules linked in: cpufreq_userspace cpufreq_stats > cpufreq_powersave cpufreq_conservative f71882fg mcs7830 usbnet evdev ohci_pci > ohci_hcd pcspkr i2c_nforce2 psmouse serio_raw coretemp video wmi button > acpi_cpufreq processor thermal_sys ehci_pci ehci_hcd sg sd_mod usbcore > usb_common > [ 47.300013] CPU: 0 PID: 2821 Comm: out Not tainted 3.14.0-rc2 #2 Yes that one is annoying.. I've seen it several times. Unfortunately I cannot seem to reproduce on either the WSM (which should have INST.RET as PEBS capable) nor my core2 laptop. That said, the laptop seems sick on current kernels, so it might be hiding due to other problems. I'll see if I can run through the reproduction case by hand. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: x86_pmu_start WARN_ON.
On Thu, 30 Jan 2014, Dave Jones wrote: > I gave Vince's perf_fuzzer a run, hoping to trigger a different perf bug > that I've been seeing. Instead I hit a different bug. I've been seeing that WARN_ON for months but it was hard to reproduce. After a lot of hassle (and scores or reboots) I managed to narrow this to a small test case. Unfortunately it only reproduces on my core2 machine. My ivb machine won't trigger it because it doesn't support PERF_COUNT_HW_INSTRUCTIONS when precise_ip=1; is set. This is with 3.14-rc2 The test case is attached, the warning message from my machine is below. Vince [ 47.296031] [ cut here ] [ 47.300013] WARNING: CPU: 0 PID: 2821 at arch/x86/kernel/cpu/perf_event.c:1076 x86_pmu_start+0x46/0xee() [ 47.300013] Modules linked in: cpufreq_userspace cpufreq_stats cpufreq_powersave cpufreq_conservative f71882fg mcs7830 usbnet evdev ohci_pci ohci_hcd pcspkr i2c_nforce2 psmouse serio_raw coretemp video wmi button acpi_cpufreq processor thermal_sys ehci_pci ehci_hcd sg sd_mod usbcore usb_common [ 47.300013] CPU: 0 PID: 2821 Comm: out Not tainted 3.14.0-rc2 #2 [ 47.300013] Hardware name: AOpen DE7000/nMCP7ALPx-DE R1.06 Oct.19.2012, BIOS 080015 10/19/2012 [ 47.300013] 817f25ad 814e746b [ 47.300013] 8103bf1c 880037e62420 81012126 88011fc13400 [ 47.300013] 88011b2b1800 88011fc0b940 0021 [ 47.300013] Call Trace: [ 47.300013] [] ? dump_stack+0x41/0x56 [ 47.300013] [] ? warn_slowpath_common+0x79/0x92 [ 47.300013] [] ? x86_pmu_start+0x46/0xee [ 47.300013] [] ? x86_pmu_start+0x46/0xee [ 47.300013] [] ? x86_pmu_enable+0x1d3/0x285 [ 47.300013] [] ? perf_event_context_sched_in+0x6d/0x8d [ 47.300013] [] ? __perf_event_task_sched_in+0x21/0x108 [ 47.300013] [] ? idle_balance+0x11a/0x157 [ 47.300013] [] ? finish_task_switch+0x40/0xa5 [ 47.300013] [] ? __schedule+0x46a/0x4bd [ 47.300013] [] ? schedule_timeout+0x1d/0xb4 [ 47.300013] [] ? generic_exec_single+0x3f/0x52 [ 47.300013] [] ? perf_reg_value+0x4c/0x4c [ 47.300013] [] ? smp_call_function_single+0xdc/0xf2 [ 47.300013] [] ? dump_stack+0x56/0x56 [ 47.300013] [] ? __wait_for_common+0xce/0x14a [ 47.300013] [] ? try_to_wake_up+0x19a/0x19a [ 47.300013] [] ? get_tracepoint+0x20/0x53 [ 47.300013] [] ? T.944+0x1c8/0x1c8 [ 47.300013] [] ? wait_rcu_gp+0x3f/0x46 [ 47.300013] [] ? wait_rcu_gp+0x46/0x46 [ 47.300013] [] ? perf_trace_event_unreg+0x2e/0xbd [ 47.300013] [] ? perf_trace_destroy+0x2e/0x3b [ 47.300013] [] ? __free_event+0x2d/0x52 [ 47.300013] [] ? perf_event_release_kernel+0x74/0x7b [ 47.300013] [] ? perf_release+0x10/0x14 [ 47.300013] [] ? __fput+0xdf/0x1a4 [ 47.300013] [] ? task_work_run+0x7f/0x96 [ 47.300013] [] ? int_signal+0x12/0x17 [ 47.300013] ---[ end trace 9ccad2f02057baa8 ]--- /* log_to_code output from ./warning29.log */ /* by Vince Weaver #include #include #include #include #include #include #include #include #include #include #include #include int fd[1024]; struct perf_event_attr pe[1024]; int perf_event_open(struct perf_event_attr *hw_event_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags) { return syscall(__NR_perf_event_open,hw_event_uptr, pid, cpu, group_fd, flags); } int main(int argc, char **argv) { int i; for(i=0;i<1024;i++) fd[i]=-1; /* 1 */ /* Random Seed was 1392048997 */ /* 2 */ /* 3 */ /* /proc/sys/kernel/perf_event_max_sample_rate was 10 */ /* 4 */ /* fd = 3 */ memset(&pe[3],0,sizeof(struct perf_event_attr)); pe[3].type=PERF_TYPE_BREAKPOINT; pe[3].size=72; pe[3].config=0xd9ab819b; pe[3].sample_type=0; /* 0 */ pe[3].read_format=PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING|PERF_FORMAT_ID; /* 7 */ pe[3].inherit=1; pe[3].exclude_kernel=1; pe[3].exclude_hv=1; pe[3].exclude_idle=1; pe[3].freq=1; pe[3].inherit_stat=1; pe[3].enable_on_exec=1; pe[3].precise_ip=1; /* constant skid */ pe[3].sample_id_all=1; pe[3].wakeup_events=0; pe[3].bp_type=HW_BREAKPOINT_R|HW_BREAKPOINT_W; /*3*/ pe[3].bp_addr=0x0; pe[3].bp_len=0x8; fd[3]=perf_event_open(&pe[3], getpid(), /* current thread */ -1, /* all cpus */ fd[19], /* 19 is group leader */ PERF_FLAG_FD_NO_GROUP /*1*/ ); /* 5 */ /* fd = 23 */ memset(&pe[23],0,sizeof(struct perf_event_attr)); pe[23].type=PERF_TYPE_HARDWARE; pe[23].config=PERF_COUNT_HW_INSTRUCTIONS; pe[23].sample_type=0; /* 0 */ pe[23].read_format=PERF_FORMAT_TOTAL_TIME_ENABLED; /* 1 */ pe[23].exclude_hv=1; pe[23].exclude_idle=1; pe[23].mmap=1; pe[23].enable_on_exec=1; pe[23].task=1; pe[23].watermark=1; pe[23].precise_ip=1; /* constant skid */ pe[23].mmap_data=1; pe[23].sample_id_all=1; pe[23].exclude_host=1; pe[23].exclude_guest=1; pe[23].exclude_callchain_user=1; pe[23].wakeup_watermark=0; pe[23].bp_type=HW_BREAKPOINT_EMPTY; fd[23]=perf_event_open(&pe[2