Re: [LTP] BUG: dead loop in PowerPC hcall tracepoint (Was: [PATCH v2] Add ftrace-stress-test to LTP)

2010-10-18 Thread Benjamin Herrenschmidt
On Mon, 2010-10-18 at 11:19 +0800, Li Zefan wrote:
 Cc: Steven
 Cc: Ingo
 Cc: Peter
 Cc: Anton Blanchard an...@samba.org
 Cc: Paul Mackerras pau...@samba.org
 
 For those Cced, More information here:
 
 http://marc.info/?l=ltp-listm=128569007015044w=2
 http://marc.info/?l=ltp-listm=128696942432669w=2

Hrm... that's nasty...

Should we have some kind of flag to avoid spin_yield() calling H_CEDE
(or whatever it calls) when tracing an hcall to prevent that ? Anton,
Paulus, any other smart idea ? A TLF_ would do...

Cheers,
Ben.

 02:37, Subrata Modak wrote:
  I get a bigger trace with this patch:
  
  Unable to handle kernel paging request for data at address 0x
  Faulting instruction address: 0xc02133f0
  cpu 0x2: Vector: 300 (Data Access) at [c000d9f8b560]
  pc: c02133f0: .trace_clock_global+0xb4/0x2a0
  lr: c0213458: .trace_clock_global+0x11c/0x2a0
  sp: c000d9f8b7e0
 msr: 8200b032
 dar: 0
   dsisr: 4000
current = 0xc000d9f7d100
paca= 0xc7fc8e00
  pid   = 1667, comm = ftrace_stack_tr
  Unrecoverable FP Unavailable Exception 800 at c16a9540
  cpu 0x0: Vector: 8Unable to handle0 kernel paging r0 equest for data (at
  address 0xbffFe0175b688
  PU UnavaFaulting instruciltion address: 0xac01017fcb
  le) at [c000d9f8a6a0]
  p   pc: c16a9540: etnetre r?  ?f ofro rh ehlepl
  
  
  lr: [c0016a9540: key_type_dns_resolver+0x15110/0x365f8
  sp: c18804e8
 msr: 80001032
current = 0xc000d838d100
paca= 0xc7fc8000
  pid   = 1668, comm = ftrace_stack_ma
   pid   = 1668, cc02226b0 .rb_reserve_next_event+0x20c/0x804
  [c000d9f8b9b0] c0223178 .ring_buffer_lock_reserve
  +0x24c/0x2a4
  [c000d9f8ba40] c022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
  [c000d9f8baf0] c022ec9c .trace_current_buffer_lock_reserve
  +0x44/0x6c
  [c000d9f8bb80] c0011c5c .ftrace_raw_event_hcall_entry
  +0x7c/0x144
  [c000d9f8bc40] c0096624 .__trace_hcall_entry+0xa0/0xec
  [c000d9f8bcd0] c009786c .plpar_hcall_norets+0x50/0xd0
  [c000d9f8bd40] c00749c8 .__spin_yield+0x130/0x15c
  [c000d9f8bdd0] c0213458 .trace_clock_global+0x11c/0x2a0
 
 This is a dead loop:
 
 trace_hcall_entry() - trace_clock_global() - trace_hcall_entry() ..
 
 And this is a PPC specific bug. Hope some ppc guys will fix it?
 Or we kill trace_clock_global() if no one actually uses it..
 
 --
 Li Zefan
 
  [c000d9f8be90] c02226b0 .rb_reserve_next_event+0x20c/0x804
  [c000d9f8bfa0] c0223178 .ring_buffer_lock_reserve
  +0x24c/0x2a4
  [c000d9f8c030] c022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
  [c000d9f8c0e0] c022ec9c .trace_current_buffer_lock_reserve
  +0x44/0x6c
  [c000d9f8c170] c0011c5c .ftrace_raw_event_hcall_entry
  +0x7c/0x144
  [c000d9f8c230] c0096624 .__trace_hcall_entry+0xa0/0xec
  [c000d9f8c2c0] c009786c .plpar_hcall_norets+0x50/0xd0
  [c000d9f8c330] c00749c8 .__spin_yield+0x130/0x15c
  [c000d9f8c3c0] c0213458 .trace_clock_global+0x11c/0x2a0
  [c000d9f8c480] c02226b0 .rb_reserve_next_event+0x20c/0x804
  [c000d9f8c590] c0223178 .ring_buffer_lock_reserve
  +0x24c/0x2a4
  [c000d9f8c620] c022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
  [c000d9f8c6d0] c022ec9c .trace_current_buffer_lock_reserve
  +0x44/0x6c
  [c000d9f8c760] c0011c5c .ftrace_raw_event_hcall_entry
  +0x7c/0x144
  [c000d9f8c820] c0096624 .__trace_hcall_entry+0xa0/0xec
  [c000d9f8c8b0] c009786c .plpar_hcall_norets+0x50/0xd0
  [c000d9f8c920] c00749c8 .__spin_yield+0x130/0x15c
  [c000d9f8c9b0] c0213458 .trace_clock_global+0x11c/0x2a0
  [c000d9f8ca70] c02226b0 .rb_reserve_next_event+0x20c/0x804
  [c000d9f8cb80] c0223178 .ring_buffer_lock_reserve
  +0x24c/0x2a4
  [c000d9f8cc10] c022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
  [c000d9f8ccc0] c022ec9c .trace_current_buffer_lock_reserve
  +0x44/0x6c
  [c000d9f8cd50] c0011c5c .ftrace_raw_event_hcall_entry
  +0x7c/0x144
  [c000d9f8ce10] c0096624 .__trace_hcall_entry+0xa0/0xec
  [c000d9f8cea0] c009786c .plpar_hcall_norets+0x50/0xd0
  [c000d9f8cf10] c00749c8 .__spin_yield+0x130/0x15c
  [c000d9f8cfa0] c0213458 .trace_clock_global+0x11c/0x2a0
  [c000d9f8d060] c02226b0 .rb_reserve_next_event+0x20c/0x804
  [c000d9f8d170] c0223178 .ring_buffer_lock_reserve
  +0x24c/0x2a4
  [c000d9f8d200] c022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
  [c000d9f8d2b0] c022ec9c .trace_current_buffer_lock_reserve
  +0x44/0x6c
  [c000d9f8d340] c0011c5c .ftrace_raw_event_hcall_entry
  +0x7c/0x144
  [c000d9f8d400] c0096624 

Re: [LTP] BUG: dead loop in PowerPC hcall tracepoint (Was: [PATCH v2] Add ftrace-stress-test to LTP)

2010-10-18 Thread Steven Rostedt
On Mon, 2010-10-18 at 11:19 +0800, Li Zefan wrote:

 This is a dead loop:
 
 trace_hcall_entry() - trace_clock_global() - trace_hcall_entry() ..
 
 And this is a PPC specific bug. Hope some ppc guys will fix it?
 Or we kill trace_clock_global() if no one actually uses it..

trace_clock_global() is used by many. I use it (and recommend using it)
on boxes where the TSC is horribly out of sync, and the trace needs
synchronization between CPUs.

The trace_hcall_entry and exit has wrappers already. Just add recursion
protection there.

Perhaps something like this:

(Not compiled nor ran)

+static DEFINE_PER_CPU(hcall_trace_disable);
+
 void hcall_tracepoint_regfunc(void)
 {
hcall_tracepoint_refcount++;
 }

 void hcall_tracepoint_unregfunc(void)
 {
hcall_tracepoint_refcount--;
 }

+int __trace_disable_check(void)
+{
+   if (!hcall_tracepoint_refcount)
+   return 1;
+
+   if (get_cpu_var(hcall_trace_disable)) {
+   put_cpu_var(hcall_trace_disable);
+   return 1;
+   }
+
+   __get_cpu_var(hcall_trace_disable)++;
+
+   return 0;
+}
+
+void __trace_disable_put(void)
+{
+   __get_cpu_var(hcall_trace_disable)--;
+   put_cpu_var(hcall_trace_disable);
+}
+
 void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
 {
+   int trace_disable;
+
+   if (__trace_disable_check())
+   return;
+
trace_hcall_entry(opcode, args);
+   __trace_disable_put();
 }

 void __trace_hcall_exit(long opcode, unsigned long retval,
unsigned long *retbuf)
 {
+   if (__trace_disable_check())
+   return;
+
trace_hcall_exit(opcode, retval, retbuf);
+   __trace_disable_put();
 }

-- Steve



--
Download new Adobe(R) Flash(R) Builder(TM) 4
The new Adobe(R) Flex(R) 4 and Flash(R) Builder(TM) 4 (formerly 
Flex(R) Builder(TM)) enable the development of rich applications that run
across multiple browsers and platforms. Download your free trials today!
http://p.sf.net/sfu/adobe-dev2dev
___
Ltp-list mailing list
Ltp-list@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/ltp-list


Re: [LTP] BUG: dead loop in PowerPC hcall tracepoint (Was: [PATCH v2] Add ftrace-stress-test to LTP)

2010-10-18 Thread Li Zefan
Steven Rostedt wrote:
 On Mon, 2010-10-18 at 11:19 +0800, Li Zefan wrote:
 
 This is a dead loop:

 trace_hcall_entry() - trace_clock_global() - trace_hcall_entry() ..

 And this is a PPC specific bug. Hope some ppc guys will fix it?
 Or we kill trace_clock_global() if no one actually uses it..
 
 trace_clock_global() is used by many. I use it (and recommend using it)
 on boxes where the TSC is horribly out of sync, and the trace needs
 synchronization between CPUs.
 
 The trace_hcall_entry and exit has wrappers already. Just add recursion
 protection there.
 

Right, I thought of this. But as I have no machine to test, I'll leave
this to others.

 Perhaps something like this:
 
 (Not compiled nor ran)
 
 +static DEFINE_PER_CPU(hcall_trace_disable);
 +
  void hcall_tracepoint_regfunc(void)
  {
   hcall_tracepoint_refcount++;
  }
 
  void hcall_tracepoint_unregfunc(void)
  {
   hcall_tracepoint_refcount--;
  }
 
 +int __trace_disable_check(void)
 +{
 + if (!hcall_tracepoint_refcount)
 + return 1;
 +
 + if (get_cpu_var(hcall_trace_disable)) {
 + put_cpu_var(hcall_trace_disable);
 + return 1;
 + }
 +
 + __get_cpu_var(hcall_trace_disable)++;
 +
 + return 0;
 +}
 +
 +void __trace_disable_put(void)
 +{
 + __get_cpu_var(hcall_trace_disable)--;
 + put_cpu_var(hcall_trace_disable);
 +}
 +
  void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
  {
 + int trace_disable;
 +
 + if (__trace_disable_check())
 + return;
 +
   trace_hcall_entry(opcode, args);
 + __trace_disable_put();
  }
 
  void __trace_hcall_exit(long opcode, unsigned long retval,
   unsigned long *retbuf)
  {
 + if (__trace_disable_check())
 + return;
 +
   trace_hcall_exit(opcode, retval, retbuf);
 + __trace_disable_put();
  }
 
 -- Steve
 
 
 
 

--
Download new Adobe(R) Flash(R) Builder(TM) 4
The new Adobe(R) Flex(R) 4 and Flash(R) Builder(TM) 4 (formerly 
Flex(R) Builder(TM)) enable the development of rich applications that run
across multiple browsers and platforms. Download your free trials today!
http://p.sf.net/sfu/adobe-dev2dev
___
Ltp-list mailing list
Ltp-list@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/ltp-list