Re: x86_pmu_start WARN_ON.

2014-02-25 Thread Vince Weaver
On Mon, 24 Feb 2014, Peter Zijlstra wrote:

> On Fri, Feb 21, 2014 at 03:18:38PM -0500, Vince Weaver wrote:
> > I've applied the patch and have been unable to trigger the warning with 
> > either my testcase or a few hours of fuzzing.
> 
> Yay.
> 
> > My only comment on the patch is it could always use some comments.
> > 
> > The perf_event code is really hard to follow as is, without adding
> > more uncommented special cases.
> 
> Does the below help a bit? Or is there anywhere in particular you want
> more comments?

yes, every little bit helps.

While chasing these fuzzer-related bugs I end up deep in the perf_event
code and many of the routines have no comments at all.  Eventually I have 
to dig out the K+R book to figure out order precendece of ++ prefix 
operators, have at least 2-3 different files open in editors, plus a bunch 
of firefox tabs open to http://lxr.free-electrons.com, and even then I 
misunderstand the code a lot.

Vince

> 
> ---
> Subject: perf, x86: Add a few more comments
> From: Peter Zijlstra 
> Date: Mon Feb 24 12:26:21 CET 2014
> 
> Add a few comments on the ->add(), ->del() and ->*_txn()
> implementation.
> 
> Requested-by: Vince Weaver 
> Signed-off-by: Peter Zijlstra 
> ---
>  arch/x86/kernel/cpu/perf_event.c |   49 
> +++
>  arch/x86/kernel/cpu/perf_event.h |8 +++---
>  2 files changed, 39 insertions(+), 18 deletions(-)
> 
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -892,7 +892,6 @@ static void x86_pmu_enable(struct pmu *p
>* hw_perf_group_sched_in() or x86_pmu_enable()
>*
>* step1: save events moving to new counters
> -  * step2: reprogram moved events into new counters
>*/
>   for (i = 0; i < n_running; i++) {
>   event = cpuc->event_list[i];
> @@ -918,6 +917,9 @@ static void x86_pmu_enable(struct pmu *p
>   x86_pmu_stop(event, PERF_EF_UPDATE);
>   }
>  
> + /*
> +  * step2: reprogram moved events into new counters
> +  */
>   for (i = 0; i < cpuc->n_events; i++) {
>   event = cpuc->event_list[i];
>   hwc = &event->hw;
> @@ -1043,7 +1045,7 @@ static int x86_pmu_add(struct perf_event
>   /*
>* If group events scheduling transaction was started,
>* skip the schedulability test here, it will be performed
> -  * at commit time (->commit_txn) as a whole
> +  * at commit time (->commit_txn) as a whole.
>*/
>   if (cpuc->group_flag & PERF_EVENT_TXN)
>   goto done_collect;
> @@ -1058,6 +1060,10 @@ static int x86_pmu_add(struct perf_event
>   memcpy(cpuc->assign, assign, n*sizeof(int));
>  
>  done_collect:
> + /*
> +  * Commit the collect_events() state. See x86_pmu_del() and
> +  * x86_pmu_*_txn().
> +  */
>   cpuc->n_events = n;
>   cpuc->n_added += n - n0;
>   cpuc->n_txn += n - n0;
> @@ -1183,28 +1189,38 @@ static void x86_pmu_del(struct perf_even
>* If we're called during a txn, we don't need to do anything.
>* The events never got scheduled and ->cancel_txn will truncate
>* the event_list.
> +  *
> +  * XXX assumes any ->del() called during a TXN will only be on
> +  * an event added during that same TXN.
>*/
>   if (cpuc->group_flag & PERF_EVENT_TXN)
>   return;
>  
> + /*
> +  * Not a TXN, therefore cleanup properly.
> +  */
>   x86_pmu_stop(event, PERF_EF_UPDATE);
>  
>   for (i = 0; i < cpuc->n_events; i++) {
> - if (event == cpuc->event_list[i]) {
> -
> - if (i >= cpuc->n_events - cpuc->n_added)
> - --cpuc->n_added;
> + if (event == cpuc->event_list[i])
> + break;
> + }
>  
> - if (x86_pmu.put_event_constraints)
> - x86_pmu.put_event_constraints(cpuc, event);
> + if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without 
> ->add() ? */
> + return;
>  
> - while (++i < cpuc->n_events)
> - cpuc->event_list[i-1] = cpuc->event_list[i];
> + /* If we have a newly added event; make sure to decrease n_added. */
> + if (i >= cpuc->n_events - cpuc->n_added)
> + --cpuc->n_added;
> +
> + if (x86_pmu.put_event_constraints)
> + x86_pmu.put_event_constraints(cpuc, event);
> +
> + /* Delete the array entry. */
> + while (++i < cpuc->n_events)
> + cpuc->event_list[i-1] = cpuc->event_list[i];
> + --cpuc->n_events;
>  
> - --cpuc->n_events;
> - break;
> - }
> - }
>   perf_event_update_userpage(event);
>  }
>  
> @@ -1598,7 +1614,8 @@ static void x86_pmu_cancel_txn(struct pm
>  {
>   __this

Re: x86_pmu_start WARN_ON.

2014-02-24 Thread Peter Zijlstra
On Fri, Feb 21, 2014 at 03:18:38PM -0500, Vince Weaver wrote:
> I've applied the patch and have been unable to trigger the warning with 
> either my testcase or a few hours of fuzzing.

Yay.

> My only comment on the patch is it could always use some comments.
> 
> The perf_event code is really hard to follow as is, without adding
> more uncommented special cases.

Does the below help a bit? Or is there anywhere in particular you want
more comments?

---
Subject: perf, x86: Add a few more comments
From: Peter Zijlstra 
Date: Mon Feb 24 12:26:21 CET 2014

Add a few comments on the ->add(), ->del() and ->*_txn()
implementation.

Requested-by: Vince Weaver 
Signed-off-by: Peter Zijlstra 
---
 arch/x86/kernel/cpu/perf_event.c |   49 +++
 arch/x86/kernel/cpu/perf_event.h |8 +++---
 2 files changed, 39 insertions(+), 18 deletions(-)

--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -892,7 +892,6 @@ static void x86_pmu_enable(struct pmu *p
 * hw_perf_group_sched_in() or x86_pmu_enable()
 *
 * step1: save events moving to new counters
-* step2: reprogram moved events into new counters
 */
for (i = 0; i < n_running; i++) {
event = cpuc->event_list[i];
@@ -918,6 +917,9 @@ static void x86_pmu_enable(struct pmu *p
x86_pmu_stop(event, PERF_EF_UPDATE);
}
 
+   /*
+* step2: reprogram moved events into new counters
+*/
for (i = 0; i < cpuc->n_events; i++) {
event = cpuc->event_list[i];
hwc = &event->hw;
@@ -1043,7 +1045,7 @@ static int x86_pmu_add(struct perf_event
/*
 * If group events scheduling transaction was started,
 * skip the schedulability test here, it will be performed
-* at commit time (->commit_txn) as a whole
+* at commit time (->commit_txn) as a whole.
 */
if (cpuc->group_flag & PERF_EVENT_TXN)
goto done_collect;
@@ -1058,6 +1060,10 @@ static int x86_pmu_add(struct perf_event
memcpy(cpuc->assign, assign, n*sizeof(int));
 
 done_collect:
+   /*
+* Commit the collect_events() state. See x86_pmu_del() and
+* x86_pmu_*_txn().
+*/
cpuc->n_events = n;
cpuc->n_added += n - n0;
cpuc->n_txn += n - n0;
@@ -1183,28 +1189,38 @@ static void x86_pmu_del(struct perf_even
 * If we're called during a txn, we don't need to do anything.
 * The events never got scheduled and ->cancel_txn will truncate
 * the event_list.
+*
+* XXX assumes any ->del() called during a TXN will only be on
+* an event added during that same TXN.
 */
if (cpuc->group_flag & PERF_EVENT_TXN)
return;
 
+   /*
+* Not a TXN, therefore cleanup properly.
+*/
x86_pmu_stop(event, PERF_EF_UPDATE);
 
for (i = 0; i < cpuc->n_events; i++) {
-   if (event == cpuc->event_list[i]) {
-
-   if (i >= cpuc->n_events - cpuc->n_added)
-   --cpuc->n_added;
+   if (event == cpuc->event_list[i])
+   break;
+   }
 
-   if (x86_pmu.put_event_constraints)
-   x86_pmu.put_event_constraints(cpuc, event);
+   if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without 
->add() ? */
+   return;
 
-   while (++i < cpuc->n_events)
-   cpuc->event_list[i-1] = cpuc->event_list[i];
+   /* If we have a newly added event; make sure to decrease n_added. */
+   if (i >= cpuc->n_events - cpuc->n_added)
+   --cpuc->n_added;
+
+   if (x86_pmu.put_event_constraints)
+   x86_pmu.put_event_constraints(cpuc, event);
+
+   /* Delete the array entry. */
+   while (++i < cpuc->n_events)
+   cpuc->event_list[i-1] = cpuc->event_list[i];
+   --cpuc->n_events;
 
-   --cpuc->n_events;
-   break;
-   }
-   }
perf_event_update_userpage(event);
 }
 
@@ -1598,7 +1614,8 @@ static void x86_pmu_cancel_txn(struct pm
 {
__this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
/*
-* Truncate the collected events.
+* Truncate collected array by the number of events added in this
+* transaction. See x86_pmu_add() and x86_pmu_*_txn().
 */
__this_cpu_sub(cpu_hw_events.n_added, 
__this_cpu_read(cpu_hw_events.n_txn));
__this_cpu_sub(cpu_hw_events.n_events, 
__this_cpu_read(cpu_hw_events.n_txn));
@@ -1609,6 +1626,8 @@ static void x86_pmu_cancel_txn(struct pm
  * Commit group events scheduling transaction
  * Perform the group schedulability test as a

Re: x86_pmu_start WARN_ON.

2014-02-21 Thread Vince Weaver
On Fri, 21 Feb 2014, Peter Zijlstra wrote:

> group_sched_in() that fails (for whatever reason), and without x86_pmu
> TXN support (because the leader is !x86_pmu), will corrupt the n_added
> state.
> 
> If this all is correct; the below ought to cure things.

I've applied the patch and have been unable to trigger the warning with 
either my testcase or a few hours of fuzzing.

My only comment on the patch is it could always use some comments.

The perf_event code is really hard to follow as is, without adding
more uncommented special cases.

Vince


> 
> Signed-off-by: Peter Zijlstra 
> ---
>  arch/x86/kernel/cpu/perf_event.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/arch/x86/kernel/cpu/perf_event.c 
> b/arch/x86/kernel/cpu/perf_event.c
> index 895604f2e916..79f9f848bee4 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -1192,6 +1192,9 @@ static void x86_pmu_del(struct perf_event *event, int 
> flags)
>   for (i = 0; i < cpuc->n_events; i++) {
>   if (event == cpuc->event_list[i]) {
>  
> + if (i >= cpuc->n_events - cpuc->n_added)
> + --cpuc->n_added;
> +
>   if (x86_pmu.put_event_constraints)
>   x86_pmu.put_event_constraints(cpuc, event);
>  
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-21 Thread Peter Zijlstra
On Thu, Feb 20, 2014 at 07:23:00PM +0100, Peter Zijlstra wrote:
> This is I think the relevant bit:
> 
>pec_1076_warn-2804  [000] d...   147.926153: x86_pmu_disable: 
> x86_pmu_disable
>pec_1076_warn-2804  [000] d...   147.926153: x86_pmu_state: Events: {
>pec_1076_warn-2804  [000] d...   147.926156: x86_pmu_state:   0: state: .R 
> config:  (  (null))
>pec_1076_warn-2804  [000] d...   147.926158: x86_pmu_state:   33: state: 
> AR config: 0 (88011ac99800)
>pec_1076_warn-2804  [000] d...   147.926159: x86_pmu_state: }
>pec_1076_warn-2804  [000] d...   147.926160: x86_pmu_state: n_events: 1, 
> n_added: 0, n_txn: 1
>pec_1076_warn-2804  [000] d...   147.926161: x86_pmu_state: Assignment: {
>pec_1076_warn-2804  [000] d...   147.926162: x86_pmu_state:   0->33 tag: 1 
> config: 0 (88011ac99800)
>pec_1076_warn-2804  [000] d...   147.926163: x86_pmu_state: }
>pec_1076_warn-2804  [000] d...   147.926166: collect_events: Adding event: 
> 1 (880119ec8800)

So we add the insn:p event (fd[23])

At this point we should have:

  n_events = 2, n_added = 1, n_txn = 1

>pec_1076_warn-2804  [000] d...   147.926170: collect_events: Adding event: 
> 0 (8800c9e01800)
>pec_1076_warn-2804  [000] d...   147.926172: collect_events: Adding event: 
> 4 (8800cbab2c00)

We try and add the {BP,cycles,br_insn} group (fd[3], fd[4], fd[15]).
These events are 0:cycles and 4:br_insn, the BP event isn't x86_pmu so
that's not visible.

group_sched_in()
  pmu->start_txn() /* nop - BP pmu */
  event_sched_in()
 event->pmu->add()

So here we should end up with:

0: n_events = 3, n_added = 2, n_txn = 2
4: n_events = 4, n_added = 3, n_txn = 3

But seeing the below state on x86_pmu_enable(), the must have failed,
because the 0 and 4 events aren't there anymore.

Looking at group_sched_in(), since the BP is the leader, its
event_sched_in() must have succeeded, for otherwise we would not have
seen the sibling adds.

But since neither 0 or 4 are in the below state; their event_sched_in()
must have failed; but I don't see why, the complete state: 0,0,1:p,4
fits perfectly fine on a core2.

However, since we try and schedule 4 it means the 0 event must have
succeeded!  Therefore the 4 event must have failed, its failure will
have put group_sched_in() into the fail path, which will call:

  event_sched_out()
event->pmu->del()

on 0 and the BP event.

Now x86_pmu_del() will reduce n_events; but it will not reduce n_added;
giving what we see below:

 n_event = 2, n_added = 2, n_txn = 2

>pec_1076_warn-2804  [000] d...   147.926177: x86_pmu_enable: x86_pmu_enable
>pec_1076_warn-2804  [000] d...   147.926177: x86_pmu_state: Events: {
>pec_1076_warn-2804  [000] d...   147.926179: x86_pmu_state:   0: state: .R 
> config:  (  (null))
>pec_1076_warn-2804  [000] d...   147.926181: x86_pmu_state:   33: state: 
> AR config: 0 (88011ac99800)
>pec_1076_warn-2804  [000] d...   147.926182: x86_pmu_state: }
>pec_1076_warn-2804  [000] d...   147.926184: x86_pmu_state: n_events: 2, 
> n_added: 2, n_txn: 2
>pec_1076_warn-2804  [000] d...   147.926184: x86_pmu_state: Assignment: {
>pec_1076_warn-2804  [000] d...   147.926186: x86_pmu_state:   0->33 tag: 1 
> config: 0 (88011ac99800)
>pec_1076_warn-2804  [000] d...   147.926188: x86_pmu_state:   1->0 tag: 1 
> config: 1 (880119ec8800)
>pec_1076_warn-2804  [000] d...   147.926188: x86_pmu_state: }
>pec_1076_warn-2804  [000] d...   147.926190: x86_pmu_enable: S0: hwc->idx: 
> 33, hwc->last_cpu: 0, hwc->last_tag: 1 hwc->state: 0

So the problem is that x86_pmu_del(), when called from a
group_sched_in() that fails (for whatever reason), and without x86_pmu
TXN support (because the leader is !x86_pmu), will corrupt the n_added
state.

If this all is correct; the below ought to cure things.

Signed-off-by: Peter Zijlstra 
---
 arch/x86/kernel/cpu/perf_event.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 895604f2e916..79f9f848bee4 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1192,6 +1192,9 @@ static void x86_pmu_del(struct perf_event *event, int 
flags)
for (i = 0; i < cpuc->n_events; i++) {
if (event == cpuc->event_list[i]) {
 
+   if (i >= cpuc->n_events - cpuc->n_added)
+   --cpuc->n_added;
+
if (x86_pmu.put_event_constraints)
x86_pmu.put_event_constraints(cpuc, event);
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-21 Thread Vince Weaver

and the perf_fuzzer overnight triggered this possibly related warning
in x86_pmu_stop()

I assume it's this code (the line numbers don't match up for some reason).

if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
x86_pmu.disable(event);
cpuc->events[hwc->idx] = NULL;
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
}

so possibly also related to PERF_HES_STOPPED?


[25526.987821] [ cut here ]
[25526.988047] WARNING: CPU: 1 PID: 3849 at 
arch/x86/kernel/cpu/perf_event.c:1221 x86_pmu_stop+0x71/0xa6()
[25526.988047] Modules linked in: cpufreq_userspace cpufreq_stats 
cpufreq_powersave cpufreq_conservative f71882fg mcs7830 usbnet ohci_pci evdev 
video wmi psmouse serio_raw coretemp i2c_nforce2 ohci_hcd acpi_cpufreq 
processor pcspkr thermal_sys button sg ehci_pci ehci_hcd sd_mod usbcore 
usb_common
[25526.988047] CPU: 1 PID: 3849 Comm: perf_fuzzer Tainted: GW
3.14.0-rc3+ #17
[25526.988047] Hardware name: AOpen   DE7000/nMCP7ALPx-DE R1.06 Oct.19.2012, 
BIOS 080015  10/19/2012
[25526.988047]  04c5 88011fc83ca8 81530d11 
04c5
[25526.988047]   88011fc83ce8 8103fce4 
88011fc83d28
[25526.988047]  81012ca0 8800c9be6000 0004 
88011fc8b940
[25526.988047] Call Trace:
[25526.988047][] dump_stack+0x49/0x60
[25526.988047]  [] warn_slowpath_common+0x81/0x9b
[25526.988047]  [] ? x86_pmu_stop+0x71/0xa6
[25526.988047]  [] warn_slowpath_null+0x1a/0x1c
[25526.988047]  [] x86_pmu_stop+0x71/0xa6
[25526.988047]  [] x86_pmu_del+0x3d/0xb8
[25526.988047]  [] event_sched_out+0x92/0x121
[25526.988047]  [] group_sched_in+0xf7/0x147
[25526.988047]  [] ctx_sched_in+0x15e/0x185
[25526.988047]  [] perf_event_sched_in+0x69/0x71
[25526.988047]  [] perf_cpu_hrtimer_handler+0x13a/0x1b0
[25526.988047]  [] __run_hrtimer+0xba/0x145
[25526.988047]  [] ? __perf_install_in_context+0xf0/0xf0
[25526.988047]  [] hrtimer_interrupt+0xd5/0x1c3
[25526.988047]  [] local_apic_timer_interrupt+0x58/0x5d
[25526.988047]  [] smp_trace_apic_timer_interrupt+0x53/0x91
[25526.988047]  [] trace_apic_timer_interrupt+0x6a/0x70
[25526.988047][] ? system_call_fastpath+0x1a/0x1f
[25526.988047] ---[ end trace d5252ca58550eb43 ]---

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Vince Weaver
On Thu, 20 Feb 2014, Vince Weaver wrote:

> On Thu, 20 Feb 2014, Vince Weaver wrote:
> 
> > Might be relevant: check the last_cpu values.  Right before the above
> > it looks like the thread gets moved from CPU 1 to CPU 0
> > (possibly as a result of the long chain started with the
> > close() of the tracepoint event),
> > so the problem NMI watchdog event being enabled is a different one than 
> > the one that was disabled just before.
> 
> so is this a false warning?  If you get scheduled to a new CPU
> and there's an already running CPU-wide event, is that OK?
> 
> Or should x86_pmu_disable() be setting PERF_HES_STOPPED on all events?
> It looks like other architectures are (such as armpmu_stop() ).

an actually I take that back, other architectures don't.

It's just confusing how
perf_pmu_disable()
calls
pmu->pmu_disable()

which on arm is:
armpmu_disable()
which calls:
armpmu->stop()
which is *not* the same as:
armpmu_stop()
but is actually armpmu->pmu->stop()

Urgh.

Vince
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Vince Weaver
On Thu, 20 Feb 2014, Vince Weaver wrote:

> Might be relevant: check the last_cpu values.  Right before the above
> it looks like the thread gets moved from CPU 1 to CPU 0
> (possibly as a result of the long chain started with the
> close() of the tracepoint event),
> so the problem NMI watchdog event being enabled is a different one than 
> the one that was disabled just before.

so is this a false warning?  If you get scheduled to a new CPU
and there's an already running CPU-wide event, is that OK?

Or should x86_pmu_disable() be setting PERF_HES_STOPPED on all events?
It looks like other architectures are (such as armpmu_stop() ).

Vince

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Steven Rostedt
On Thu, 20 Feb 2014 19:15:38 +0100
Peter Zijlstra  wrote:


> I think by using the /debug/tracing/events/ftrace/function event, but
> I'm not actually sure, I've never used it nor did I write the code to do
> it. Jolsa did all that IIRC.
> 
> All I know is that we had some 'fun' bugs around there sometime back.

Note, the function tracer now has a lot better recursion protection than
it use to.

-- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Vince Weaver
On Thu, 20 Feb 2014, Peter Zijlstra wrote:

> On Thu, Feb 20, 2014 at 01:03:16PM -0500, Vince Weaver wrote:
> > attached, it's not very big.
> 
> This is I think the relevant bit:
> 
>pec_1076_warn-2804  [000] d...   147.926153: x86_pmu_disable: 
> x86_pmu_disable
>pec_1076_warn-2804  [000] d...   147.926153: x86_pmu_state: Events: {
>pec_1076_warn-2804  [000] d...   147.926156: x86_pmu_state:   0: state: .R 
> config:  (  (null))
>pec_1076_warn-2804  [000] d...   147.926158: x86_pmu_state:   33: state: 
> AR config: 0 (88011ac99800)
>pec_1076_warn-2804  [000] d...   147.926159: x86_pmu_state: }
>pec_1076_warn-2804  [000] d...   147.926160: x86_pmu_state: n_events: 1, 
> n_added: 0, n_txn: 1
>pec_1076_warn-2804  [000] d...   147.926161: x86_pmu_state: Assignment: {
>pec_1076_warn-2804  [000] d...   147.926162: x86_pmu_state:   0->33 tag: 1 
> config: 0 (88011ac99800)
>pec_1076_warn-2804  [000] d...   147.926163: x86_pmu_state: }
>pec_1076_warn-2804  [000] d...   147.926166: collect_events: Adding event: 
> 1 (880119ec8800)
>pec_1076_warn-2804  [000] d...   147.926170: collect_events: Adding event: 
> 0 (8800c9e01800)
>pec_1076_warn-2804  [000] d...   147.926172: collect_events: Adding event: 
> 4 (8800cbab2c00)
>pec_1076_warn-2804  [000] d...   147.926177: x86_pmu_enable: x86_pmu_enable
>pec_1076_warn-2804  [000] d...   147.926177: x86_pmu_state: Events: {
>pec_1076_warn-2804  [000] d...   147.926179: x86_pmu_state:   0: state: .R 
> config:  (  (null))
>pec_1076_warn-2804  [000] d...   147.926181: x86_pmu_state:   33: state: 
> AR config: 0 (88011ac99800)
>pec_1076_warn-2804  [000] d...   147.926182: x86_pmu_state: }
>pec_1076_warn-2804  [000] d...   147.926184: x86_pmu_state: n_events: 2, 
> n_added: 2, n_txn: 2
>pec_1076_warn-2804  [000] d...   147.926184: x86_pmu_state: Assignment: {
>pec_1076_warn-2804  [000] d...   147.926186: x86_pmu_state:   0->33 tag: 1 
> config: 0 (88011ac99800)
>pec_1076_warn-2804  [000] d...   147.926188: x86_pmu_state:   1->0 tag: 1 
> config: 1 (880119ec8800)
>pec_1076_warn-2804  [000] d...   147.926188: x86_pmu_state: }
>pec_1076_warn-2804  [000] d...   147.926190: x86_pmu_enable: S0: hwc->idx: 
> 33, hwc->last_cpu: 0, hwc->last_tag: 1 hwc->state: 0
>pec_1076_warn-2804  [000] d...   147.926191: x86_pmu_enable: starting: 0
> 
> so it does indeed look like n_added got scrambled; we started out with 1
> event on disable; we've got 2 events on enable, but n_added is also 2,
> which would suggest we had 0 on disable.
> 
> That makes us want to (re)start the NMI counter alright.

Might be relevant: check the last_cpu values.  Right before the above
it looks like the thread gets moved from CPU 1 to CPU 0
(possibly as a result of the long chain started with the
close() of the tracepoint event),
so the problem NMI watchdog event being enabled is a different one than 
the one that was disabled just before.

Vince



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Andi Kleen
On Thu, Feb 20, 2014 at 07:15:38PM +0100, Peter Zijlstra wrote:
> On Thu, Feb 20, 2014 at 09:31:19AM -0800, Andi Kleen wrote:
> > Peter Zijlstra  writes:
> > >
> > > It will; trace_printk() works without -pg, I think you didn't read the
> > > instructions very well.
> > 
> > Ok, you enable and disable it again.  I won't guess why you do that.
> 
> To grow the trace buffers; it starts with just a few pages per cpu; once
> you switch to an actual tracer it allocates a sensible amount.
> 
> You can grow it with another interface; but then I'd have to like
> remember what that was and how big the normal buffers are. Simply
> toggling between tracers is far easier.

I see.

> 
> > > And there's a very good reason not to apply your patch; you can route
> > > the function tracer into perf, guess what happens when perf calls the
> > > function tracer again :-)
> > 
> > How? 
> 
> I think by using the /debug/tracing/events/ftrace/function event, but
> I'm not actually sure, I've never used it nor did I write the code to do
> it. Jolsa did all that IIRC.
> 
> All I know is that we had some 'fun' bugs around there sometime back.

Ok.

I don't think it would be a problem in any case, the ftrace code has recursion
protection.

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Peter Zijlstra
On Thu, Feb 20, 2014 at 01:03:16PM -0500, Vince Weaver wrote:
> attached, it's not very big.

This is I think the relevant bit:

   pec_1076_warn-2804  [000] d...   147.926153: x86_pmu_disable: x86_pmu_disable
   pec_1076_warn-2804  [000] d...   147.926153: x86_pmu_state: Events: {
   pec_1076_warn-2804  [000] d...   147.926156: x86_pmu_state:   0: state: .R 
config:  (  (null))
   pec_1076_warn-2804  [000] d...   147.926158: x86_pmu_state:   33: state: AR 
config: 0 (88011ac99800)
   pec_1076_warn-2804  [000] d...   147.926159: x86_pmu_state: }
   pec_1076_warn-2804  [000] d...   147.926160: x86_pmu_state: n_events: 1, 
n_added: 0, n_txn: 1
   pec_1076_warn-2804  [000] d...   147.926161: x86_pmu_state: Assignment: {
   pec_1076_warn-2804  [000] d...   147.926162: x86_pmu_state:   0->33 tag: 1 
config: 0 (88011ac99800)
   pec_1076_warn-2804  [000] d...   147.926163: x86_pmu_state: }
   pec_1076_warn-2804  [000] d...   147.926166: collect_events: Adding event: 1 
(880119ec8800)
   pec_1076_warn-2804  [000] d...   147.926170: collect_events: Adding event: 0 
(8800c9e01800)
   pec_1076_warn-2804  [000] d...   147.926172: collect_events: Adding event: 4 
(8800cbab2c00)
   pec_1076_warn-2804  [000] d...   147.926177: x86_pmu_enable: x86_pmu_enable
   pec_1076_warn-2804  [000] d...   147.926177: x86_pmu_state: Events: {
   pec_1076_warn-2804  [000] d...   147.926179: x86_pmu_state:   0: state: .R 
config:  (  (null))
   pec_1076_warn-2804  [000] d...   147.926181: x86_pmu_state:   33: state: AR 
config: 0 (88011ac99800)
   pec_1076_warn-2804  [000] d...   147.926182: x86_pmu_state: }
   pec_1076_warn-2804  [000] d...   147.926184: x86_pmu_state: n_events: 2, 
n_added: 2, n_txn: 2
   pec_1076_warn-2804  [000] d...   147.926184: x86_pmu_state: Assignment: {
   pec_1076_warn-2804  [000] d...   147.926186: x86_pmu_state:   0->33 tag: 1 
config: 0 (88011ac99800)
   pec_1076_warn-2804  [000] d...   147.926188: x86_pmu_state:   1->0 tag: 1 
config: 1 (880119ec8800)
   pec_1076_warn-2804  [000] d...   147.926188: x86_pmu_state: }
   pec_1076_warn-2804  [000] d...   147.926190: x86_pmu_enable: S0: hwc->idx: 
33, hwc->last_cpu: 0, hwc->last_tag: 1 hwc->state: 0
   pec_1076_warn-2804  [000] d...   147.926191: x86_pmu_enable: starting: 0

so it does indeed look like n_added got scrambled; we started out with 1
event on disable; we've got 2 events on enable, but n_added is also 2,
which would suggest we had 0 on disable.

That makes us want to (re)start the NMI counter alright.

I've got to go run errands for a bit; hopefully I can find a bit of time
later tonight to look this over.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Peter Zijlstra
On Thu, Feb 20, 2014 at 12:46:12PM -0500, Steven Rostedt wrote:
> On Thu, 20 Feb 2014 12:43:51 -0500
> Steven Rostedt  wrote:
> 
> > As a disable_trace_on_warning is more of a modification to the kernel,
> > I'm leaning to adding a /proc/sys/kernel/ftrace_disable_on_warning
> > file. This keeps it in line with ftrace_dump_on_oops, which is the most
> > similar feature.
> 
> Nevermind. There's a "traceoff_on_warning" already there that does
> exactly this :-/

Bugger; I so missed that looking for it earlier today.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Peter Zijlstra
On Thu, Feb 20, 2014 at 09:31:19AM -0800, Andi Kleen wrote:
> Peter Zijlstra  writes:
> >
> > It will; trace_printk() works without -pg, I think you didn't read the
> > instructions very well.
> 
> Ok, you enable and disable it again.  I won't guess why you do that.

To grow the trace buffers; it starts with just a few pages per cpu; once
you switch to an actual tracer it allocates a sensible amount.

You can grow it with another interface; but then I'd have to like
remember what that was and how big the normal buffers are. Simply
toggling between tracers is far easier.

> > And there's a very good reason not to apply your patch; you can route
> > the function tracer into perf, guess what happens when perf calls the
> > function tracer again :-)
> 
> How? 

I think by using the /debug/tracing/events/ftrace/function event, but
I'm not actually sure, I've never used it nor did I write the code to do
it. Jolsa did all that IIRC.

All I know is that we had some 'fun' bugs around there sometime back.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Vince Weaver
On Thu, 20 Feb 2014, Peter Zijlstra wrote:

> On Wed, Feb 19, 2014 at 05:34:49PM -0500, Vince Weaver wrote:
> > So where would the NMI counter event get disabled?  Would it never get 
> > disabled, just because it's always running and always gets the same fixed 
> > slot?  Why isn't this a problem all the time, not just with corner cases?
> 
> Well it could get another counter assignment I suppose; there's no
> guarantee it always ends up in the fixed counter although that's rather
> likely.
> 
> > Is somehow n_added getting confused?
> 
> I'd expect worse than a warning if that happened, but who knows.
> 
> You could try with the below; make sure you've got CONFIG_FUNCTION_TRACER=y.

> # cat /debug/tracing/trace | bzip2 -9 > ~/trace.bz2
> 
> And send that (and look at it of course).

attached, it's not very big.  

Vince

trace.bz2
Description: Binary data


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Steven Rostedt
On Thu, 20 Feb 2014 12:43:51 -0500
Steven Rostedt  wrote:

> As a disable_trace_on_warning is more of a modification to the kernel,
> I'm leaning to adding a /proc/sys/kernel/ftrace_disable_on_warning
> file. This keeps it in line with ftrace_dump_on_oops, which is the most
> similar feature.

Nevermind. There's a "traceoff_on_warning" already there that does
exactly this :-/

-- Steve

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Steven Rostedt
On Thu, 20 Feb 2014 18:00:18 +0100
Peter Zijlstra  wrote:

> On Thu, Feb 20, 2014 at 11:26:00AM -0500, Steven Rostedt wrote:
> > On Thu, 20 Feb 2014 11:08:30 +0100
> > Peter Zijlstra  wrote:
> > 
> > > @rostedt: WTF is disable_trace_on_warning a boot option only?
> > 
> > Laziness.
> > 
> > 
> > I'll add a sysctl for it in 3.15.
> 
> /debug/tracing/options/ was where I was looking first.
> 
> Its a bit weird to have half the options in options/ and the other half
> as sysctl in /proc/sys/kernel/

Yeah, that is somewhat strange.  The /proc/sys/kernel/ ftrace options
is somewhat historical. Things that were more about how ftrace works
outside of the tracing word exists there. For example,
ftrace_dump_on_oops is there, because it is about modifying the way the
kernel works on crash and not how ftrace works.

This is something similar, it's about how the kernel works, not how
ftrace works. I try to have the debugfs options be ways to modify how
ftrace works, like formatting, what it prints, etc. But things that
modify the way the kernel work, I like to keep in /proc/sys/kernel.
That is, ftrace_dump_on_oops is what happens on kernel crash,
stack_tracer_enabled is something on the boarder. That is, it should
probably have been in the tracing facility, as it is similar to
something like the irqsoff tracer. But as it wasn't to be a tracer in
itself, it was added to proc.

The ftrace_enabled, is a big on off switch to enable or disable ALL
function tracing. Even for perf and friends. I added a trace option
"function-trace" that will just disable function tracing for ftrace and
not affect other users of function tracing.

As a disable_trace_on_warning is more of a modification to the kernel,
I'm leaning to adding a /proc/sys/kernel/ftrace_disable_on_warning
file. This keeps it in line with ftrace_dump_on_oops, which is the most
similar feature.

-- Steve


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Andi Kleen
Peter Zijlstra  writes:
>
> It will; trace_printk() works without -pg, I think you didn't read the
> instructions very well.

Ok, you enable and disable it again.  I won't guess why you do that.

>
> And there's a very good reason not to apply your patch; you can route
> the function tracer into perf, guess what happens when perf calls the
> function tracer again :-)

How? 

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Peter Zijlstra
On Thu, Feb 20, 2014 at 11:26:00AM -0500, Steven Rostedt wrote:
> On Thu, 20 Feb 2014 11:08:30 +0100
> Peter Zijlstra  wrote:
> 
> > @rostedt: WTF is disable_trace_on_warning a boot option only?
> 
> Laziness.
> 
> 
> I'll add a sysctl for it in 3.15.

/debug/tracing/options/ was where I was looking first.

Its a bit weird to have half the options in options/ and the other half
as sysctl in /proc/sys/kernel/


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Steven Rostedt
On Thu, 20 Feb 2014 11:08:30 +0100
Peter Zijlstra  wrote:

> @rostedt: WTF is disable_trace_on_warning a boot option only?

Laziness.


I'll add a sysctl for it in 3.15.


-- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Andi Kleen
Peter Zijlstra  writes:

> On Wed, Feb 19, 2014 at 05:34:49PM -0500, Vince Weaver wrote:
>> So where would the NMI counter event get disabled?  Would it never get 
>> disabled, just because it's always running and always gets the same fixed 
>> slot?  Why isn't this a problem all the time, not just with corner cases?
>
> Well it could get another counter assignment I suppose; there's no
> guarantee it always ends up in the fixed counter although that's rather
> likely.
>
>> Is somehow n_added getting confused?
>
> I'd expect worse than a warning if that happened, but who knows.
>
> You could try with the below; make sure you've got CONFIG_FUNCTION_TRACER=y.
>
> Then (assuming debugfs is mounted in /debug ; most modern distros have
> it at /sys/kernel/debug but they're wrong!):
>
> # echo 0 > /debug/tracing/tracing_on
> # echo function > /debug/tracing/current_tracer
> # echo nop > /debug/tracing/current_tracer
> # echo 0 > /debug/tracing/trace
> # echo 1 > /debug/tracing/tracing_on
>
> And make it go *BOOM*, then:
>
> # cat /debug/tracing/trace | bzip2 -9 > ~/trace.bz2

This won't work unless you apply this patch

http://lkml.org/lkml/2013/10/20/94

-Andi


-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Peter Zijlstra
On Thu, Feb 20, 2014 at 07:47:23AM -0800, Andi Kleen wrote:
> Peter Zijlstra  writes:
> 
> > On Wed, Feb 19, 2014 at 05:34:49PM -0500, Vince Weaver wrote:
> >> So where would the NMI counter event get disabled?  Would it never get 
> >> disabled, just because it's always running and always gets the same fixed 
> >> slot?  Why isn't this a problem all the time, not just with corner cases?
> >
> > Well it could get another counter assignment I suppose; there's no
> > guarantee it always ends up in the fixed counter although that's rather
> > likely.
> >
> >> Is somehow n_added getting confused?
> >
> > I'd expect worse than a warning if that happened, but who knows.
> >
> > You could try with the below; make sure you've got CONFIG_FUNCTION_TRACER=y.
> >
> > Then (assuming debugfs is mounted in /debug ; most modern distros have
> > it at /sys/kernel/debug but they're wrong!):
> >
> > # echo 0 > /debug/tracing/tracing_on
> > # echo function > /debug/tracing/current_tracer
> > # echo nop > /debug/tracing/current_tracer
> > # echo 0 > /debug/tracing/trace
> > # echo 1 > /debug/tracing/tracing_on
> >
> > And make it go *BOOM*, then:
> >
> > # cat /debug/tracing/trace | bzip2 -9 > ~/trace.bz2
> 
> This won't work unless you apply this patch
> 
> http://lkml.org/lkml/2013/10/20/94

It will; trace_printk() works without -pg, I think you didn't read the
instructions very well.

And there's a very good reason not to apply your patch; you can route
the function tracer into perf, guess what happens when perf calls the
function tracer again :-)

But yes; you can use it if you don't do that. But I didn't want the
function trace, so who cares.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-20 Thread Peter Zijlstra
On Wed, Feb 19, 2014 at 05:34:49PM -0500, Vince Weaver wrote:
> So where would the NMI counter event get disabled?  Would it never get 
> disabled, just because it's always running and always gets the same fixed 
> slot?  Why isn't this a problem all the time, not just with corner cases?

Well it could get another counter assignment I suppose; there's no
guarantee it always ends up in the fixed counter although that's rather
likely.

> Is somehow n_added getting confused?

I'd expect worse than a warning if that happened, but who knows.

You could try with the below; make sure you've got CONFIG_FUNCTION_TRACER=y.

Then (assuming debugfs is mounted in /debug ; most modern distros have
it at /sys/kernel/debug but they're wrong!):

# echo 0 > /debug/tracing/tracing_on
# echo function > /debug/tracing/current_tracer
# echo nop > /debug/tracing/current_tracer
# echo 0 > /debug/tracing/trace
# echo 1 > /debug/tracing/tracing_on

And make it go *BOOM*, then:

# cat /debug/tracing/trace | bzip2 -9 > ~/trace.bz2

And send that (and look at it of course).

@rostedt: WTF is disable_trace_on_warning a boot option only?

---
 arch/x86/kernel/cpu/perf_event.c | 65 +++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 895604f2e916..d3edf301deb8 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -509,6 +509,42 @@ void x86_pmu_disable_all(void)
}
 }
 
+static void x86_pmu_state(struct pmu *pmu)
+{
+   struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+   int i;
+
+   trace_printk("Events: {\n");
+   for (i = 0; i < X86_PMC_IDX_MAX; i++) {
+   struct perf_event *event = cpuc->events[i];
+   int active = test_bit(i, cpuc->active_mask);
+   int running = test_bit(i, cpuc->running);
+
+   if (active || running) {
+   trace_printk("  %d: state: %c%c config: %lx (%p)\n",
+   i, active ? 'A' : '.',
+   running ? 'R' : '.',
+   event ? event->attr.config : -1L,
+   event);
+   }
+   }
+   trace_printk("}\n");
+
+
+   trace_printk("n_events: %d, n_added: %d, n_txn: %d\n",
+   cpuc->n_events, cpuc->n_added, cpuc->n_txn);
+   trace_printk("Assignment: {\n");
+   for (i = 0; i < cpuc->n_events; i++) {
+   struct perf_event *event = cpuc->event_list[i];
+
+   trace_printk("  %d->%d tag: %lx config: %lx (%p)\n",
+   i, cpuc->assign[i], cpuc->tags[i],
+   event ? event->attr.config : -1L,
+   event);
+   }
+   trace_printk("}\n");
+}
+
 static void x86_pmu_disable(struct pmu *pmu)
 {
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -524,6 +560,9 @@ static void x86_pmu_disable(struct pmu *pmu)
barrier();
 
x86_pmu.disable_all();
+
+   trace_printk("x86_pmu_disable\n");
+   x86_pmu_state(pmu);
 }
 
 void x86_pmu_enable_all(int added)
@@ -820,6 +859,7 @@ static int collect_events(struct cpu_hw_events *cpuc, 
struct perf_event *leader,
return -EINVAL;
cpuc->event_list[n] = leader;
n++;
+   trace_printk("Adding event: %lx (%p)\n", leader->attr.config, 
leader);
}
if (!dogrp)
return n;
@@ -834,6 +874,7 @@ static int collect_events(struct cpu_hw_events *cpuc, 
struct perf_event *leader,
 
cpuc->event_list[n] = event;
n++;
+   trace_printk("Adding event: %lx (%p)\n", event->attr.config, 
event);
}
return n;
 }
@@ -885,6 +926,9 @@ static void x86_pmu_enable(struct pmu *pmu)
if (cpuc->enabled)
return;
 
+   trace_printk("x86_pmu_enable\n");
+   x86_pmu_state(pmu);
+
if (cpuc->n_added) {
int n_running = cpuc->n_events - cpuc->n_added;
/*
@@ -898,6 +942,11 @@ static void x86_pmu_enable(struct pmu *pmu)
event = cpuc->event_list[i];
hwc = &event->hw;
 
+   trace_printk("K%d: hwc->idx: %d, hwc->last_cpu: %d,"
+" hwc->last_tag: %lx hwc->state: %x\n",
+i, hwc->idx, hwc->last_cpu,
+hwc->last_tag, hwc->state);
+
/*
 * we can avoid reprogramming counter if:
 * - assigned same counter as last time
@@ -915,6 +964,8 @@ static void x86_pmu_enable(struct pmu *pmu)
if (hwc->state & PERF_HES_STOPPED)
hwc->state |= PERF_HES_ARCH;

Re: x86_pmu_start WARN_ON.

2014-02-19 Thread Vince Weaver
On Wed, 19 Feb 2014, Peter Zijlstra wrote:

> So when we add a new event (or more) we compute a mapping from event to
> counter. Then we disable all (pre existing) events that moved to a new
> location, then we enable all events (insert HES_ARCH) that were running
> but got relocated and the new events.
> 
> Of course the code is horrible, but I think the above is what it does.

The code is a pain, with all of the various ctx types, and the nested 
calls to perf_pmu_enable/perf_pmu_disable in particular.


So I've hacked the code so that it shows the last location where 
hwc->state was cleared for an event (i.e., where it was last enabled).

The NMI counter event was enabled in _perf_install_in_context (presumably 
at creation time) and as far as I can tell never disabled.

Then when x86_pmu_enable/x86_pmu_start gets called at the end of
_perf_event_context_sched_in() it is sad because the NMI counter event is 
not disabled.

So where would the NMI counter event get disabled?  Would it never get 
disabled, just because it's always running and always gets the same fixed 
slot?  Why isn't this a problem all the time, not just with corner cases?

Is somehow n_added getting confused?

Vince

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-19 Thread Peter Zijlstra
On Tue, Feb 18, 2014 at 05:20:57PM -0500, Vince Weaver wrote:
> On Tue, 18 Feb 2014, Vince Weaver wrote:
> 
> > On Mon, 17 Feb 2014, Peter Zijlstra wrote:
> > 
> > > Enable CONFIG_FRAME_POINTER for better stack traces; I suspect the
> > > list_del_event() is just random stack garbage. The path that makes sense
> > > is:
> > >   wait_rcu()->__wait_for_common()->schedule_timeout()
> > 
> > Here's an updated stack trace on 3.14-rc3 with CONFIG_FRAME_POINTER 
> > enabled, in case it's helpful:
> 
> Still chasing this, although all I can add are these debug messages:
> 
> [  140.812003] PROBLEM: n_events=2 n_added=2 VMW: idx=33 state=f00 type=0 
> config=0 samp_per=5e6069eb0
> [  140.812003] ALL: VMW: Num=0 idx=33 state=f00 type=0 config=0 
> samp_per=5e6069eb0
> [  140.812003] ALL: VMW: Num=1 idx=0 state=3 type=0 config=1 samp_per=0
> 
> So when the WARN gets triggered there only only two events in the event
> list, the NMI watchdog which has already been enabled somehow (that f00
> I stuck in, pmu_start sets it to f00 instead of 00 to make sure it wasn't
> something stomping on memory) and the precise instructions event.
> 
> I still have a hard time following what all the schedule in code is doing.

Yes, I got it once, then promptly forgot it. It all became the thing it
is because AMD Fam15 had some horrible constraints.

So in general it tries to map events to counters in order of decreasing
constraints (so it starts with the most constrained events).

It all gets a bit funny due to overlapping constraints; see commit
bc1738f6ee830 for a little blurb on what the overlap thing is about.


So when we add a new event (or more) we compute a mapping from event to
counter. Then we disable all (pre existing) events that moved to a new
location, then we enable all events (insert HES_ARCH) that were running
but got relocated and the new events.

Of course the code is horrible, but I think the above is what it does.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-18 Thread Vince Weaver
On Tue, 18 Feb 2014, Vince Weaver wrote:

> On Mon, 17 Feb 2014, Peter Zijlstra wrote:
> 
> > Enable CONFIG_FRAME_POINTER for better stack traces; I suspect the
> > list_del_event() is just random stack garbage. The path that makes sense
> > is:
> >   wait_rcu()->__wait_for_common()->schedule_timeout()
> 
> Here's an updated stack trace on 3.14-rc3 with CONFIG_FRAME_POINTER 
> enabled, in case it's helpful:

Still chasing this, although all I can add are these debug messages:

[  140.812003] PROBLEM: n_events=2 n_added=2 VMW: idx=33 state=f00 type=0 
config=0 samp_per=5e6069eb0
[  140.812003] ALL: VMW: Num=0 idx=33 state=f00 type=0 config=0 
samp_per=5e6069eb0
[  140.812003] ALL: VMW: Num=1 idx=0 state=3 type=0 config=1 samp_per=0

So when the WARN gets triggered there only only two events in the event
list, the NMI watchdog which has already been enabled somehow (that f00
I stuck in, pmu_start sets it to f00 instead of 00 to make sure it wasn't
something stomping on memory) and the precise instructions event.

I still have a hard time following what all the schedule in code is doing.

Vince
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-18 Thread Vince Weaver
On Mon, 17 Feb 2014, Peter Zijlstra wrote:

> Enable CONFIG_FRAME_POINTER for better stack traces; I suspect the
> list_del_event() is just random stack garbage. The path that makes sense
> is:
>   wait_rcu()->__wait_for_common()->schedule_timeout()

Here's an updated stack trace on 3.14-rc3 with CONFIG_FRAME_POINTER 
enabled, in case it's helpful:

[  237.004035] [ cut here ]
[  237.008016] WARNING: CPU: 1 PID: 2791 at 
arch/x86/kernel/cpu/perf_event.c:1076 x86_pmu_start+0x41/0x113()
[  237.008016] Modules linked in: cpufreq_userspace cpufreq_stats 
cpufreq_powersave cpufreq_conservative f71882fg mcs7830 usbnet evdev wmi 
acpi_cpufreq video coretemp psmouse processor thermal_sys button ohci_pci 
ohci_hcd serio_raw pcspkr i2c_nforce2 sg ehci_pci ehci_hcd sd_mod usbcore 
usb_common
[  237.008016] CPU: 1 PID: 2791 Comm: pec_1076_warn Not tainted 3.14.0-rc3+ #10
[  237.008016] Hardware name: AOpen   DE7000/nMCP7ALPx-DE R1.06 Oct.19.2012, 
BIOS 080015  10/19/2012
[  237.008016]  0434 8800cae6b9d8 8150e4ff 
0434
[  237.008016]   8800cae6ba18 8103d64e 
8800cae6ba48
[  237.008016]  81012b10 0021 88011b31dc00 
88011fc8b940
[  237.008016] Call Trace:
[  237.008016]  [] dump_stack+0x49/0x62
[  237.008016]  [] warn_slowpath_common+0x7c/0x96
[  237.008016]  [] ? x86_pmu_start+0x41/0x113
[  237.008016]  [] warn_slowpath_null+0x15/0x17
[  237.008016]  [] x86_pmu_start+0x41/0x113
[  237.008016]  [] x86_pmu_enable+0x1d8/0x27d
[  237.008016]  [] perf_pmu_enable+0x21/0x23
[  237.008016]  [] perf_event_context_sched_in+0x79/0x9a
[  237.008016]  [] __perf_event_task_sched_in+0x26/0x10f
[  237.008016]  [] ? idle_balance+0x125/0x161
[  237.008016]  [] finish_task_switch+0x41/0xa8
[  237.008016]  [] __schedule+0x48f/0x4e3
[  237.008016]  [] ? __hrtimer_start_range_ns+0x2e8/0x2fa
[  237.008016]  [] schedule+0x6e/0x70
[  237.008016]  [] schedule_timeout+0x22/0xbd
[  237.008016]  [] ? dump_stack+0x62/0x62
[  237.008016]  [] __wait_for_common+0xc6/0x142
[  237.008016]  [] ? try_to_wake_up+0x1a8/0x1a8
[  237.008016]  [] ? T.944+0x1de/0x1de
[  237.008016]  [] wait_for_completion+0x1f/0x21
[  237.008016]  [] wait_rcu_gp+0x46/0x4f
[  237.008016]  [] ? wait_rcu_gp+0x4f/0x4f
[  237.008016]  [] synchronize_sched+0x29/0x2b
[  237.008016]  [] perf_trace_event_unreg+0x36/0xc8
[  237.008016]  [] perf_trace_destroy+0x36/0x46
[  237.008016]  [] tp_perf_event_destroy+0x9/0xb
[  237.008016]  [] __free_event+0x35/0x5d
[  237.008016]  [] free_event+0x10d/0x116
[  237.008016]  [] perf_event_release_kernel+0x7d/0x88
[  237.008016]  [] put_event+0xa3/0xac
[  237.008016]  [] perf_release+0x10/0x14
[  237.008016]  [] __fput+0xeb/0x1b0
[  237.008016]  [] fput+0x9/0xb
[  237.008016]  [] task_work_run+0x83/0x9b
[  237.008016]  [] do_notify_resume+0x58/0x5f
[  237.008016]  [] int_signal+0x12/0x17
[  237.008016] ---[ end trace a07d99177587379f ]---
[  237.008016] IN: VMW: idx=33 state=0 type=0 config=0 samp_per=5e619d890

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-17 Thread Peter Zijlstra
On Thu, Feb 13, 2014 at 05:13:20PM -0500, Vince Weaver wrote:
> On Thu, 13 Feb 2014, Vince Weaver wrote:
> 
> > The plot thickens.  The WARN_ON is not caused by the cycles event that we 
> > open, but it's caused by the NMI Watchdog cycles event.
> 
> The WARN_ON_ONCE at line 1076 in perf_event.c is triggering because
> in x86_pmu_enable() is calling x86_pmu_start() for all of the active x86 
> events (three plus the NMI watchdog) the NMI watchdog is unexpectedly
> not having PERF_HES_STOPPED set (it's hw.state is 0).

Cute, that is indeed unexpected.

> What's the deal with the PERF_HES_STOPPED name?  Is HES an acronym?
> Or is it just a male event?  

Hardware Event State

> Also it's not really clear what PERF_HES_ARCH indicates.

I ran out of names it seems; its used in the reschedule case where we
relocate a stopped event. We save the STOPPED state into ARCH (because
we're going to destroy the STOPPED state by stopping everybody), so that
we know not to (re)enable the event when its on its new location.

The comment in x86_pmu_enable() near where we set ARCH was supposed to
communicate this.

> Things rapidly get complicated beyond that, as the NMI watchdog is a 
> kernel-created event bound to the CPU, wheras 2 of the events are x86 hw 
> events with a breakpoint-event groupleader (and the fact one of the events 
> has precise set).

For this code that _should_ not matter much; they're 3 events and we
mapped them into hardware counters.

So the precise has to run on cnt0 on Core2, the NMI is simple enough to
fit (and we prefer) fixed purpose counters. So there _should_ not be a
reshuffle.

Although I should probably assert these _should_ thingies.

> From the stacktrace it looks like it is the close of a completely 
> unrelated tracepoint event that triggers this all off, but I'm not
> sure why a list_del_event() call of the tracepoint name would
> trigger a schedule_timeout() and an ensuing __perf_event_task_sched_in()
> which is what eventually triggers the problem.

Enable CONFIG_FRAME_POINTER for better stack traces; I suspect the
list_del_event() is just random stack garbage. The path that makes sense
is:
  wait_rcu()->__wait_for_common()->schedule_timeout()

> Scattering printks around doesn't see to work because a lot of the related 
> calls are happening in contexts where printks don't really work.
> 
> Anyway I just wanted to summarize my findings as I might not have a chance 
> to look at this again for a while.  For completion I'm including the 
> backtrace below.

Sure, much appreciated. I'll go read up on the event schedule code, its
been a while since I stared at that in too much detail.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-13 Thread Vince Weaver
On Thu, 13 Feb 2014, Vince Weaver wrote:

> The plot thickens.  The WARN_ON is not caused by the cycles event that we 
> open, but it's caused by the NMI Watchdog cycles event.

The WARN_ON_ONCE at line 1076 in perf_event.c is triggering because
in x86_pmu_enable() is calling x86_pmu_start() for all of the active x86 
events (three plus the NMI watchdog) the NMI watchdog is unexpectedly
not having PERF_HES_STOPPED set (it's hw.state is 0).

What's the deal with the PERF_HES_STOPPED name?  Is HES an acronym?
Or is it just a male event?  
Also it's not really clear what PERF_HES_ARCH indicates.

Things rapidly get complicated beyond that, as the NMI watchdog is a 
kernel-created event bound to the CPU, wheras 2 of the events are x86 hw 
events with a breakpoint-event groupleader (and the fact one of the events 
has precise set).

>From the stacktrace it looks like it is the close of a completely 
unrelated tracepoint event that triggers this all off, but I'm not
sure why a list_del_event() call of the tracepoint name would
trigger a schedule_timeout() and an ensuing __perf_event_task_sched_in()
which is what eventually triggers the problem.

Scattering printks around doesn't see to work because a lot of the related 
calls are happening in contexts where printks don't really work.

Anyway I just wanted to summarize my findings as I might not have a chance 
to look at this again for a while.  For completion I'm including the 
backtrace below.

Vince

[   49.972034] [ cut here ]
[   49.976014] WARNING: CPU: 1 PID: 2867 at 
arch/x86/kernel/cpu/perf_event.c:1076 x86_pmu_start+0x38/0x102()
[   49.976014] Modules linked in: cpufreq_userspace cpufreq_stats 
cpufreq_powersave cpufreq_conservative f71882fg evdev mcs7830 usbnet ohci_pci 
video acpi_cpufreq processor thermal_sys wmi psmouse serio_raw pcspkr coretemp 
button ohci_hcd i2c_nforce2 ehci_pci ehci_hcd sg sd_mod usbcore usb_common
[   49.976014] CPU: 1 PID: 2867 Comm: pec_1076_warn Not tainted 3.14.0-rc2+ #7
[   49.976014] Hardware name: AOpen   DE7000/nMCP7ALPx-DE R1.06 Oct.19.2012, 
BIOS 080015  10/19/2012
[   49.976014]   817f256d 814e750b 

[   49.976014]  8103bf2c 8800ca005820 81012119 
88011fc93400
[   49.976014]  0021 88011b31dc00 88011fc8b940 

[   49.976014] Call Trace:
[   49.976014]  [] ? dump_stack+0x41/0x56
[   49.976014]  [] ? warn_slowpath_common+0x79/0x92
[   49.976014]  [] ? x86_pmu_start+0x38/0x102
[   49.976014]  [] ? x86_pmu_start+0x38/0x102
[   49.976014]  [] ? x86_pmu_enable+0x1d3/0x285
[   49.976014]  [] ? perf_event_context_sched_in+0x6d/0x8d
[   49.976014]  [] ? __perf_event_task_sched_in+0x21/0x108
[   49.976014]  [] ? idle_balance+0x11a/0x157
[   49.976014]  [] ? finish_task_switch+0x40/0xa5
[   49.976014]  [] ? __schedule+0x46a/0x4bd
[   49.976014]  [] ? schedule_timeout+0x1d/0xb4
[   49.976014]  [] ? list_del_event+0x78/0x8f
[   49.976014]  [] ? dump_stack+0x56/0x56
[   49.976014]  [] ? __wait_for_common+0xce/0x14a
[   49.976014]  [] ? try_to_wake_up+0x19a/0x19a
[   49.976014]  [] ? get_tracepoint+0x20/0x53
[   49.976014]  [] ? T.944+0x1c8/0x1c8
[   49.976014]  [] ? wait_rcu_gp+0x3f/0x46
[   49.976014]  [] ? wait_rcu_gp+0x46/0x46
[   49.976014]  [] ? perf_trace_event_unreg+0x2e/0xbd
[   49.976014]  [] ? perf_trace_destroy+0x2e/0x3b
[   49.976014]  [] ? __free_event+0x2d/0x52
[   49.976014]  [] ? perf_event_release_kernel+0x74/0x7b
[   49.976014]  [] ? perf_release+0x10/0x14
[   49.976014]  [] ? __fput+0xdf/0x1a4
[   49.976014]  [] ? task_work_run+0x7f/0x96
[   49.976014]  [] ? int_signal+0x12/0x17
[   49.976014] ---[ end trace 0b1abb2bc23cc0c5 ]---
[   49.976014] VMW: idx=33 state=0 type=0 config=0 samp_per=5e5fc1760

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-13 Thread Vince Weaver
On Thu, 13 Feb 2014, Vince Weaver wrote:
> On Wed, 12 Feb 2014, Vince Weaver wrote:
> > 
> > It is triggered in this case when you have:
> > 
> >   An event group of breakpoint, cycles, branches
> >   An event of instructions with precise=1
> >   A tracepoint
> > 
> > and then you close the tracepoint.
> 
> and for what it's worth, it's the cycles event (type=0 config=0) that's 
> causing the WARN_ON.

The plot thickens.  The WARN_ON is not caused by the cycles event that we 
open, but it's caused by the NMI Watchdog cycles event.

I'm not sure if that's useful info or not.

Vince
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-13 Thread Vince Weaver
On Wed, 12 Feb 2014, Vince Weaver wrote:

> On Tue, 11 Feb 2014, Peter Zijlstra wrote:
> > 
> > I'll see if I can run through the reproduction case by hand.
> 
> I've come up with an even simpler test case with all of the extraneous 
> settings removed.  Included below.
> 
> It is triggered in this case when you have:
> 
>   An event group of breakpoint, cycles, branches
>   An event of instructions with precise=1
>   A tracepoint
> 
> and then you close the tracepoint.

and for what it's worth, it's the cycles event (type=0 config=0) that's 
causing the WARN_ON.

It looks like I'm going to have to resort to sprinkling the code with 
printk()s to solve this one.

Vince
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-12 Thread Vince Weaver
On Tue, 11 Feb 2014, Peter Zijlstra wrote:
> 
> I'll see if I can run through the reproduction case by hand.

I've come up with an even simpler test case with all of the extraneous 
settings removed.  Included below.

It is triggered in this case when you have:

  An event group of breakpoint, cycles, branches
  An event of instructions with precise=1
  A tracepoint

and then you close the tracepoint.

---


/* log_to_code output from ./warning29.log */
/* by Vince Weaver 
#include 
#include 
#include 
#include 
#include 

static int fd[1024];
static struct perf_event_attr pe[1024];

int perf_event_open(struct perf_event_attr *hw_event_uptr,
pid_t pid, int cpu, int group_fd, unsigned long flags) {

return syscall(__NR_perf_event_open,hw_event_uptr, pid, cpu,
group_fd, flags);
}

int main(int argc, char **argv) {

/* Random Seed was 1392048997 */
/* /proc/sys/kernel/perf_event_max_sample_rate was 10 */


memset(&pe[3],0,sizeof(struct perf_event_attr));
pe[3].type=PERF_TYPE_BREAKPOINT;
pe[3].size=72;
pe[3].bp_type=HW_BREAKPOINT_R|HW_BREAKPOINT_W; /*3*/
pe[3].bp_len=0x8;

fd[3]=perf_event_open(&pe[3],
0, /* current thread */
-1, /* all cpus */
-1, /* group leader */
0);

memset(&pe[23],0,sizeof(struct perf_event_attr));
pe[23].type=PERF_TYPE_HARDWARE;
pe[23].config=PERF_COUNT_HW_INSTRUCTIONS;
pe[23].precise_ip=1; /* this needs to be set to trigger? */

fd[23]=perf_event_open(&pe[23],
0, /* current thread */
-1, /* all cpus */
-1, /* group leader */
0 /*0*/ );

memset(&pe[4],0,sizeof(struct perf_event_attr));
pe[4].type=PERF_TYPE_HARDWARE;
pe[4].config=PERF_COUNT_HW_CPU_CYCLES;

fd[4]=perf_event_open(&pe[4],
0, /* current thread */
-1, /* all cpus */
fd[3], /* 3 is group leader */
0 /*0*/ );


memset(&pe[5],0,sizeof(struct perf_event_attr));
pe[5].type=PERF_TYPE_TRACEPOINT;
pe[5].config=0x1d;
/* 29 irq_vectors/error_apic_entry */
/* Config of tracepoint doesn't seem to matter 
*/

fd[5]=perf_event_open(&pe[5],
0, /* current thread */
-1, /* all cpus */
-1, /* New Group Leader */
0 /*0*/ );

memset(&pe[15],0,sizeof(struct perf_event_attr));
pe[15].type=PERF_TYPE_HARDWARE;
pe[15].config=PERF_COUNT_HW_BRANCH_INSTRUCTIONS;

fd[15]=perf_event_open(&pe[15],
0, /* current thread */
-1, /* all cpus */
fd[3], /* 3 is group leader */
0 /*0*/ );
close(fd[5]);

return 0;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-11 Thread Peter Zijlstra
On Mon, Feb 10, 2014 at 04:26:29PM -0500, Vince Weaver wrote:
> On Thu, 30 Jan 2014, Dave Jones wrote:
> 
> > I gave Vince's perf_fuzzer a run, hoping to trigger a different perf bug
> > that I've been seeing. Instead I hit a different bug.
> 
> I've been seeing that WARN_ON for months but it was hard to reproduce.
> After a lot of hassle (and scores or reboots) I managed to narrow this to 
> a small test case.
> 
> Unfortunately it only reproduces on my core2 machine.  My ivb machine 
> won't trigger it because it doesn't support PERF_COUNT_HW_INSTRUCTIONS
> when precise_ip=1; is set.
> 
> This is with 3.14-rc2
> 
> The test case is attached, the warning message from my machine is below.
> 
> Vince
> 
> [   47.296031] [ cut here ]
> [   47.300013] WARNING: CPU: 0 PID: 2821 at 
> arch/x86/kernel/cpu/perf_event.c:1076 x86_pmu_start+0x46/0xee()
> [   47.300013] Modules linked in: cpufreq_userspace cpufreq_stats 
> cpufreq_powersave cpufreq_conservative f71882fg mcs7830 usbnet evdev ohci_pci 
> ohci_hcd pcspkr i2c_nforce2 psmouse serio_raw coretemp video wmi button 
> acpi_cpufreq processor thermal_sys ehci_pci ehci_hcd sg sd_mod usbcore 
> usb_common
> [   47.300013] CPU: 0 PID: 2821 Comm: out Not tainted 3.14.0-rc2 #2

Yes that one is annoying.. I've seen it several times.

Unfortunately I cannot seem to reproduce on either the WSM (which should
have INST.RET as PEBS capable) nor my core2 laptop.

That said, the laptop seems sick on current kernels, so it might be
hiding due to other problems.

I'll see if I can run through the reproduction case by hand.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: x86_pmu_start WARN_ON.

2014-02-10 Thread Vince Weaver
On Thu, 30 Jan 2014, Dave Jones wrote:

> I gave Vince's perf_fuzzer a run, hoping to trigger a different perf bug
> that I've been seeing. Instead I hit a different bug.

I've been seeing that WARN_ON for months but it was hard to reproduce.
After a lot of hassle (and scores or reboots) I managed to narrow this to 
a small test case.

Unfortunately it only reproduces on my core2 machine.  My ivb machine 
won't trigger it because it doesn't support PERF_COUNT_HW_INSTRUCTIONS
when precise_ip=1; is set.

This is with 3.14-rc2

The test case is attached, the warning message from my machine is below.

Vince

[   47.296031] [ cut here ]
[   47.300013] WARNING: CPU: 0 PID: 2821 at 
arch/x86/kernel/cpu/perf_event.c:1076 x86_pmu_start+0x46/0xee()
[   47.300013] Modules linked in: cpufreq_userspace cpufreq_stats 
cpufreq_powersave cpufreq_conservative f71882fg mcs7830 usbnet evdev ohci_pci 
ohci_hcd pcspkr i2c_nforce2 psmouse serio_raw coretemp video wmi button 
acpi_cpufreq processor thermal_sys ehci_pci ehci_hcd sg sd_mod usbcore 
usb_common
[   47.300013] CPU: 0 PID: 2821 Comm: out Not tainted 3.14.0-rc2 #2
[   47.300013] Hardware name: AOpen   DE7000/nMCP7ALPx-DE R1.06 Oct.19.2012, 
BIOS 080015  10/19/2012
[   47.300013]   817f25ad 814e746b 

[   47.300013]  8103bf1c 880037e62420 81012126 
88011fc13400
[   47.300013]  88011b2b1800 88011fc0b940 0021 

[   47.300013] Call Trace:
[   47.300013]  [] ? dump_stack+0x41/0x56
[   47.300013]  [] ? warn_slowpath_common+0x79/0x92
[   47.300013]  [] ? x86_pmu_start+0x46/0xee
[   47.300013]  [] ? x86_pmu_start+0x46/0xee
[   47.300013]  [] ? x86_pmu_enable+0x1d3/0x285
[   47.300013]  [] ? perf_event_context_sched_in+0x6d/0x8d
[   47.300013]  [] ? __perf_event_task_sched_in+0x21/0x108
[   47.300013]  [] ? idle_balance+0x11a/0x157
[   47.300013]  [] ? finish_task_switch+0x40/0xa5
[   47.300013]  [] ? __schedule+0x46a/0x4bd
[   47.300013]  [] ? schedule_timeout+0x1d/0xb4
[   47.300013]  [] ? generic_exec_single+0x3f/0x52
[   47.300013]  [] ? perf_reg_value+0x4c/0x4c
[   47.300013]  [] ? smp_call_function_single+0xdc/0xf2
[   47.300013]  [] ? dump_stack+0x56/0x56
[   47.300013]  [] ? __wait_for_common+0xce/0x14a
[   47.300013]  [] ? try_to_wake_up+0x19a/0x19a
[   47.300013]  [] ? get_tracepoint+0x20/0x53
[   47.300013]  [] ? T.944+0x1c8/0x1c8
[   47.300013]  [] ? wait_rcu_gp+0x3f/0x46
[   47.300013]  [] ? wait_rcu_gp+0x46/0x46
[   47.300013]  [] ? perf_trace_event_unreg+0x2e/0xbd
[   47.300013]  [] ? perf_trace_destroy+0x2e/0x3b
[   47.300013]  [] ? __free_event+0x2d/0x52
[   47.300013]  [] ? perf_event_release_kernel+0x74/0x7b
[   47.300013]  [] ? perf_release+0x10/0x14
[   47.300013]  [] ? __fput+0xdf/0x1a4
[   47.300013]  [] ? task_work_run+0x7f/0x96
[   47.300013]  [] ? int_signal+0x12/0x17
[   47.300013] ---[ end trace 9ccad2f02057baa8 ]---
/* log_to_code output from ./warning29.log */
/* by Vince Weaver 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

int fd[1024];
struct perf_event_attr pe[1024];

int perf_event_open(struct perf_event_attr *hw_event_uptr,
	pid_t pid, int cpu, int group_fd, unsigned long flags) {

	return syscall(__NR_perf_event_open,hw_event_uptr, pid, cpu,
		group_fd, flags);
}

int main(int argc, char **argv) {

	int i;

	for(i=0;i<1024;i++) fd[i]=-1;

/* 1 */
/* Random Seed was 1392048997 */
/* 2 */
/* 3 */
/* /proc/sys/kernel/perf_event_max_sample_rate was 10 */
/* 4 */
/* fd = 3 */

	memset(&pe[3],0,sizeof(struct perf_event_attr));
	pe[3].type=PERF_TYPE_BREAKPOINT;
	pe[3].size=72;
	pe[3].config=0xd9ab819b;
	pe[3].sample_type=0; /* 0 */
	pe[3].read_format=PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING|PERF_FORMAT_ID; /* 7 */
	pe[3].inherit=1;
	pe[3].exclude_kernel=1;
	pe[3].exclude_hv=1;
	pe[3].exclude_idle=1;
	pe[3].freq=1;
	pe[3].inherit_stat=1;
	pe[3].enable_on_exec=1;
	pe[3].precise_ip=1; /* constant skid */
	pe[3].sample_id_all=1;
	pe[3].wakeup_events=0;
	pe[3].bp_type=HW_BREAKPOINT_R|HW_BREAKPOINT_W; /*3*/
	pe[3].bp_addr=0x0;
	pe[3].bp_len=0x8;

	fd[3]=perf_event_open(&pe[3],
getpid(), /* current thread */
-1, /* all cpus */
fd[19], /* 19 is group leader */
PERF_FLAG_FD_NO_GROUP /*1*/ );


/* 5 */
/* fd = 23 */

	memset(&pe[23],0,sizeof(struct perf_event_attr));
	pe[23].type=PERF_TYPE_HARDWARE;
	pe[23].config=PERF_COUNT_HW_INSTRUCTIONS;
	pe[23].sample_type=0; /* 0 */
	pe[23].read_format=PERF_FORMAT_TOTAL_TIME_ENABLED; /* 1 */
	pe[23].exclude_hv=1;
	pe[23].exclude_idle=1;
	pe[23].mmap=1;
	pe[23].enable_on_exec=1;
	pe[23].task=1;
	pe[23].watermark=1;
	pe[23].precise_ip=1; /* constant skid */
	pe[23].mmap_data=1;
	pe[23].sample_id_all=1;
	pe[23].exclude_host=1;
	pe[23].exclude_guest=1;
	pe[23].exclude_callchain_user=1;
	pe[23].wakeup_watermark=0;
	pe[23].bp_type=HW_BREAKPOINT_EMPTY;

	fd[23]=perf_event_open(&pe[2