[tip:x86/trace] x86, trace: Add page fault tracepoints

2013-11-08 Thread tip-bot for Seiji Aguchi
Commit-ID:  d34603b07c4255b2b00a546d34f297ccd50ae4c6
Gitweb: http://git.kernel.org/tip/d34603b07c4255b2b00a546d34f297ccd50ae4c6
Author: Seiji Aguchi 
AuthorDate: Wed, 30 Oct 2013 16:39:03 -0400
Committer:  H. Peter Anvin 
CommitDate: Fri, 8 Nov 2013 14:15:49 -0800

x86, trace: Add page fault tracepoints

This patch introduces page fault tracepoints to x86 architecture
by switching IDT.

  Two events, for user and kernel spaces, are introduced at the beginning
  of page fault handler for tracing.

  - User space event
There is a request of page fault event for user space as below.


https://lkml.kernel.org/r/1368079520-11015-2-git-send-email-fdeslaur+()+gmail+!+com

https://lkml.kernel.org/r/1368079520-11015-1-git-send-email-fdeslaur+()+gmail+!+com

  - Kernel space event:
When we measure an overhead in kernel space for investigating performance
issues, we can check if it comes from the page fault events.

Signed-off-by: Seiji Aguchi 
Link: http://lkml.kernel.org/r/52716e67.6090...@hds.com
Signed-off-by: H. Peter Anvin 
---
 arch/x86/include/asm/trace/exceptions.h | 52 +
 arch/x86/mm/Makefile|  2 ++
 arch/x86/mm/fault.c | 13 +
 3 files changed, 67 insertions(+)

diff --git a/arch/x86/include/asm/trace/exceptions.h 
b/arch/x86/include/asm/trace/exceptions.h
new file mode 100644
index 000..86540c0
--- /dev/null
+++ b/arch/x86/include/asm/trace/exceptions.h
@@ -0,0 +1,52 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM exceptions
+
+#if !defined(_TRACE_PAGE_FAULT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PAGE_FAULT_H
+
+#include 
+
+extern void trace_irq_vector_regfunc(void);
+extern void trace_irq_vector_unregfunc(void);
+
+DECLARE_EVENT_CLASS(x86_exceptions,
+
+   TP_PROTO(unsigned long address, struct pt_regs *regs,
+unsigned long error_code),
+
+   TP_ARGS(address, regs, error_code),
+
+   TP_STRUCT__entry(
+   __field(unsigned long, address  )
+   __field(unsigned long, ip   )
+   __field(unsigned long, error_code )
+   ),
+
+   TP_fast_assign(
+   __entry->address = address;
+   __entry->ip = regs->ip;
+   __entry->error_code = error_code;
+   ),
+
+   TP_printk("address=%pf ip=%pf error_code=0x%lx",
+ (void *)__entry->address, (void *)__entry->ip,
+ __entry->error_code) );
+
+#define DEFINE_PAGE_FAULT_EVENT(name)  \
+DEFINE_EVENT_FN(x86_exceptions, name,  \
+   TP_PROTO(unsigned long address, struct pt_regs *regs,   \
+unsigned long error_code), \
+   TP_ARGS(address, regs, error_code), \
+   trace_irq_vector_regfunc,   \
+   trace_irq_vector_unregfunc);
+
+DEFINE_PAGE_FAULT_EVENT(user_page_fault);
+DEFINE_PAGE_FAULT_EVENT(kernel_page_fault);
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE exceptions
+#endif /*  _TRACE_PAGE_FAULT_H */
+
+/* This part must be outside protection */
+#include 
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 23d8e5f..6a19ad9 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -6,6 +6,8 @@ nostackp := $(call cc-option, -fno-stack-protector)
 CFLAGS_physaddr.o  := $(nostackp)
 CFLAGS_setup_nx.o  := $(nostackp)
 
+CFLAGS_fault.o := -I$(src)/../include/asm/trace
+
 obj-$(CONFIG_X86_PAT)  += pat_rbtree.o
 obj-$(CONFIG_SMP)  += tlb.o
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index fd3e281..f2730cbc 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -20,6 +20,9 @@
 #include  /* kmemcheck_*(), ...   */
 #include /* VSYSCALL_START   
*/
 
+#define CREATE_TRACE_POINTS
+#include 
+
 /*
  * Page fault error code bits:
  *
@@ -1232,12 +1235,22 @@ do_page_fault(struct pt_regs *regs, unsigned long 
error_code)
exception_exit(prev_state);
 }
 
+static void trace_page_fault_entries(struct pt_regs *regs,
+unsigned long error_code)
+{
+   if (user_mode(regs))
+   trace_user_page_fault(read_cr2(), regs, error_code);
+   else
+   trace_kernel_page_fault(read_cr2(), regs, error_code);
+}
+
 dotraplinkage void __kprobes
 trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
enum ctx_state prev_state;
 
prev_state = exception_enter();
+   trace_page_fault_entries(regs, error_code);
__do_page_fault(regs, error_code);
exception_exit(prev_state);
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/ma

[tip:x86/trace] x86, trace: Register exception handler to trace IDT

2013-11-08 Thread tip-bot for Seiji Aguchi
Commit-ID:  25c74b10bacead867478480170083f69cfc0db48
Gitweb: http://git.kernel.org/tip/25c74b10bacead867478480170083f69cfc0db48
Author: Seiji Aguchi 
AuthorDate: Wed, 30 Oct 2013 16:37:00 -0400
Committer:  H. Peter Anvin 
CommitDate: Fri, 8 Nov 2013 14:15:45 -0800

x86, trace: Register exception handler to trace IDT

This patch registers exception handlers for tracing to a trace IDT.

To implemented it in set_intr_gate(), this patch does followings.
 - Register the exception handlers to
   the trace IDT by prepending "trace_" to the handler's names.
 - Also, newly introduce trace_page_fault() to add tracepoints
   in a subsequent patch.

Signed-off-by: Seiji Aguchi 
Link: http://lkml.kernel.org/r/52716dec.5050...@hds.com
Signed-off-by: H. Peter Anvin 
---
 arch/x86/include/asm/desc.h| 28 +++-
 arch/x86/include/asm/hw_irq.h  |  3 +++
 arch/x86/include/asm/segment.h |  3 +++
 arch/x86/include/asm/traps.h   | 20 
 arch/x86/kernel/entry_32.S | 10 ++
 arch/x86/kernel/entry_64.S | 13 -
 arch/x86/kernel/head64.c   |  2 +-
 arch/x86/kernel/kvm.c  |  2 +-
 arch/x86/kernel/traps.c| 28 ++--
 arch/x86/mm/fault.c| 10 ++
 10 files changed, 97 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index d939567..3d73437 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -327,10 +327,25 @@ static inline void write_trace_idt_entry(int entry, const 
gate_desc *gate)
 {
write_idt_entry(trace_idt_table, entry, gate);
 }
+
+static inline void _trace_set_gate(int gate, unsigned type, void *addr,
+  unsigned dpl, unsigned ist, unsigned seg)
+{
+   gate_desc s;
+
+   pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
+   /*
+* does not need to be atomic because it is only done once at
+* setup time
+*/
+   write_trace_idt_entry(gate, &s);
+}
 #else
 static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
 {
 }
+
+#define _trace_set_gate(gate, type, addr, dpl, ist, seg)
 #endif
 
 static inline void _set_gate(int gate, unsigned type, void *addr,
@@ -353,11 +368,14 @@ static inline void _set_gate(int gate, unsigned type, 
void *addr,
  * Pentium F0 0F bugfix can have resulted in the mapped
  * IDT being write-protected.
  */
-static inline void set_intr_gate(unsigned int n, void *addr)
-{
-   BUG_ON((unsigned)n > 0xFF);
-   _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
-}
+#define set_intr_gate(n, addr) \
+   do {\
+   BUG_ON((unsigned)n > 0xFF); \
+   _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0,\
+ __KERNEL_CS); \
+   _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
+   0, 0, __KERNEL_CS); \
+   } while (0)
 
 extern int first_system_vector;
 /* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 92b3bae..cba45d9 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -187,6 +187,9 @@ extern __visible void smp_invalidate_interrupt(struct 
pt_regs *);
 #endif
 
 extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
+#ifdef CONFIG_TRACING
+#define trace_interrupt interrupt
+#endif
 
 typedef int vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index c48a950..6f1c3a8 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -214,6 +214,9 @@
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
+#ifdef CONFIG_TRACING
+#define trace_early_idt_handlers early_idt_handlers
+#endif
 
 /*
  * Load a segment. Fall back on loading the zero
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 7036cb6..58d66fe 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -37,6 +37,23 @@ asmlinkage void machine_check(void);
 #endif /* CONFIG_X86_MCE */
 asmlinkage void simd_coprocessor_error(void);
 
+#ifdef CONFIG_TRACING
+asmlinkage void trace_page_fault(void);
+#define trace_divide_error divide_error
+#define trace_bounds bounds
+#define trace_invalid_op invalid_op
+#define trace_device_not_available device_not_available
+#define trace_coprocessor_segment_overrun coprocessor_segment_overrun
+#define trace_invalid_TSS invalid_TSS
+#define trace_segment_not_present segment_not_present
+#define trace_general_protection general_protection
+#define tra

[tip:x86/trace] x86, trace: Delete __trace_alloc_intr_gate()

2013-11-08 Thread tip-bot for Seiji Aguchi
Commit-ID:  ac7956e2699380b8b10146ec2ba8cbe43a03ff7a
Gitweb: http://git.kernel.org/tip/ac7956e2699380b8b10146ec2ba8cbe43a03ff7a
Author: Seiji Aguchi 
AuthorDate: Wed, 30 Oct 2013 16:37:47 -0400
Committer:  H. Peter Anvin 
CommitDate: Fri, 8 Nov 2013 14:15:47 -0800

x86, trace: Delete __trace_alloc_intr_gate()

Currently irq vector handlers for tracing are registered in both set_intr_gate()
 and __trace_alloc_intr_gate() in alloc_intr_gate().
But, we don't need to do that twice.
So, let's delete __trace_alloc_intr_gate().

Signed-off-by: Seiji Aguchi 
Link: http://lkml.kernel.org/r/52716e1b.7090...@hds.com
Signed-off-by: H. Peter Anvin 
---
 arch/x86/include/asm/desc.h | 22 --
 1 file changed, 22 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 3d73437..50d033a 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -392,32 +392,10 @@ static inline void alloc_system_vector(int vector)
}
 }
 
-#ifdef CONFIG_TRACING
-static inline void trace_set_intr_gate(unsigned int gate, void *addr)
-{
-   gate_desc s;
-
-   pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
-   write_idt_entry(trace_idt_table, gate, &s);
-}
-
-static inline void __trace_alloc_intr_gate(unsigned int n, void *addr)
-{
-   trace_set_intr_gate(n, addr);
-}
-#else
-static inline void trace_set_intr_gate(unsigned int gate, void *addr)
-{
-}
-
-#define __trace_alloc_intr_gate(n, addr)
-#endif
-
 #define alloc_intr_gate(n, addr)   \
do {\
alloc_system_vector(n); \
set_intr_gate(n, addr); \
-   __trace_alloc_intr_gate(n, trace_##addr);   \
} while (0)
 
 /*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/trace] x86, trace: Remove __alloc_intr_gate()

2013-11-08 Thread tip-bot for Seiji Aguchi
Commit-ID:  959c071f0974cda7702d7574647de7ad9259eb57
Gitweb: http://git.kernel.org/tip/959c071f0974cda7702d7574647de7ad9259eb57
Author: Seiji Aguchi 
AuthorDate: Wed, 30 Oct 2013 16:36:08 -0400
Committer:  H. Peter Anvin 
CommitDate: Fri, 8 Nov 2013 14:15:44 -0800

x86, trace: Remove __alloc_intr_gate()

Prepare to move set_intr_gate() into a macro by removing
__alloc_intr_gate().

The purpose is to avoid failing a kernel build after applying a
subsequent patch which changes set_intr_gate() into a macro.

Signed-off-by: Seiji Aguchi 
Link: http://lkml.kernel.org/r/52716db8.1080...@hds.com
Signed-off-by: H. Peter Anvin 
---
 arch/x86/include/asm/desc.h | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index b90e5df..d939567 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -395,15 +395,10 @@ static inline void trace_set_intr_gate(unsigned int gate, 
void *addr)
 #define __trace_alloc_intr_gate(n, addr)
 #endif
 
-static inline void __alloc_intr_gate(unsigned int n, void *addr)
-{
-   set_intr_gate(n, addr);
-}
-
 #define alloc_intr_gate(n, addr)   \
do {\
alloc_system_vector(n); \
-   __alloc_intr_gate(n, addr); \
+   set_intr_gate(n, addr); \
__trace_alloc_intr_gate(n, trace_##addr);   \
} while (0)
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/urgent] x86/tracing: Add irq_enter/exit() in smp_trace_reschedule_interrupt()

2013-07-04 Thread tip-bot for Seiji Aguchi
Commit-ID:  4787c368a9bca39e173d702389ee2eaf0520abc1
Gitweb: http://git.kernel.org/tip/4787c368a9bca39e173d702389ee2eaf0520abc1
Author: Seiji Aguchi 
AuthorDate: Fri, 28 Jun 2013 14:02:11 -0400
Committer:  Ingo Molnar 
CommitDate: Tue, 2 Jul 2013 09:52:31 +0200

x86/tracing: Add irq_enter/exit() in smp_trace_reschedule_interrupt()

Reschedule vector tracepoints may be called in cpu idle state.
This causes lockdep check warning below.

The tracepoint requires rcu but for accuracy it also
requires irq_enter() (tracepoints record the irq context), thus,
the tracepoint interrupt handler should be calling irq_enter()
and not rcu_irq_enter() (irq_enter() calls rcu_irq_enter()).

So, add irq_enter/exit() to smp_trace_reschedule_interrupt()
with common pre/post processing functions, smp_entering_irq()
and exiting_irq() (exiting_irq() calls just irq_exit()
 in arch/x86/include/asm/apic.h),
because these can be shared among reschedule, call_function,
and call_function_single vectors.

[   50.720557] Testing event reschedule_exit:
[   50.721349]
[   50.721502] ===
[   50.721835] [ INFO: suspicious RCU usage. ]
[   50.722169] 3.10.0-rc6-4-gcf910e8 #190 Not tainted
[   50.722582] ---
[   50.722915] 
/c/kernel-tests/src/linux/arch/x86/include/asm/trace/irq_vectors.h:50 
suspicious rcu_dereference_check() usage!
[   50.723770]
[   50.723770] other info that might help us debug this:
[   50.723770]
[   50.724385]
[   50.724385] RCU used illegally from idle CPU!
[   50.724385] rcu_scheduler_active = 1, debug_locks = 0
[   50.725232] RCU used illegally from extended quiescent state!
[   50.725690] no locks held by swapper/0/0.
[   50.726010]
[   50.726010] stack backtrace:
[...]

Signed-off-by: Seiji Aguchi 
Reviewed-by: Steven Rostedt 
Link: http://lkml.kernel.org/r/51cdcfa3.9080...@hds.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/smp.c | 29 ++---
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index f4fe0b8..cdaa347 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -265,23 +265,30 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
 */
 }
 
-void smp_trace_reschedule_interrupt(struct pt_regs *regs)
+static inline void smp_entering_irq(void)
 {
ack_APIC_irq();
+   irq_enter();
+}
+
+void smp_trace_reschedule_interrupt(struct pt_regs *regs)
+{
+   /*
+* Need to call irq_enter() before calling the trace point.
+* __smp_reschedule_interrupt() calls irq_enter/exit() too (in
+* scheduler_ipi(). This is OK, since those functions are allowed
+* to nest.
+*/
+   smp_entering_irq();
trace_reschedule_entry(RESCHEDULE_VECTOR);
__smp_reschedule_interrupt();
trace_reschedule_exit(RESCHEDULE_VECTOR);
+   exiting_irq();
/*
 * KVM uses this interrupt to force a cpu out of guest mode
 */
 }
 
-static inline void call_function_entering_irq(void)
-{
-   ack_APIC_irq();
-   irq_enter();
-}
-
 static inline void __smp_call_function_interrupt(void)
 {
generic_smp_call_function_interrupt();
@@ -290,14 +297,14 @@ static inline void __smp_call_function_interrupt(void)
 
 void smp_call_function_interrupt(struct pt_regs *regs)
 {
-   call_function_entering_irq();
+   smp_entering_irq();
__smp_call_function_interrupt();
exiting_irq();
 }
 
 void smp_trace_call_function_interrupt(struct pt_regs *regs)
 {
-   call_function_entering_irq();
+   smp_entering_irq();
trace_call_function_entry(CALL_FUNCTION_VECTOR);
__smp_call_function_interrupt();
trace_call_function_exit(CALL_FUNCTION_VECTOR);
@@ -312,14 +319,14 @@ static inline void 
__smp_call_function_single_interrupt(void)
 
 void smp_call_function_single_interrupt(struct pt_regs *regs)
 {
-   call_function_entering_irq();
+   smp_entering_irq();
__smp_call_function_single_interrupt();
exiting_irq();
 }
 
 void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
 {
-   call_function_entering_irq();
+   smp_entering_irq();
trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
__smp_call_function_single_interrupt();
trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/trace] x86/tracing: Add config option checking to the definitions of mce handlers

2013-06-23 Thread tip-bot for Seiji Aguchi
Commit-ID:  33e5ff634f07dec26b7ed1fd7f9e32978fe1f2b2
Gitweb: http://git.kernel.org/tip/33e5ff634f07dec26b7ed1fd7f9e32978fe1f2b2
Author: Seiji Aguchi 
AuthorDate: Sat, 22 Jun 2013 07:33:30 -0400
Committer:  Ingo Molnar 
CommitDate: Sun, 23 Jun 2013 11:41:36 +0200

x86/tracing: Add config option checking to the definitions of mce handlers

In case CONFIG_X86_MCE_THRESHOLD and CONFIG_X86_THERMAL_VECTOR
are disabled, kernel build fails as follows.

   arch/x86/built-in.o: In function `trace_threshold_interrupt':
   (.entry.text+0x122b): undefined reference to `smp_trace_threshold_interrupt'
   arch/x86/built-in.o: In function `trace_thermal_interrupt':
   (.entry.text+0x132b): undefined reference to `smp_trace_thermal_interrupt'

In this case, trace_threshold_interrupt/trace_thermal_interrupt
are not needed to define.

So, add config option checking to their definitions in entry_64.S.

Signed-off-by: Seiji Aguchi 
Cc: rost...@goodmis.org
Link: http://lkml.kernel.org/r/51c58b8a.2080...@hds.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/entry_64.S | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 11eef43..53d6398 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1188,10 +1188,15 @@ apicinterrupt3 POSTED_INTR_VECTOR \
kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
 #endif
 
+#ifdef CONFIG_X86_MCE_THRESHOLD
 apicinterrupt THRESHOLD_APIC_VECTOR \
threshold_interrupt smp_threshold_interrupt
+#endif
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
 apicinterrupt THERMAL_APIC_VECTOR \
thermal_interrupt smp_thermal_interrupt
+#endif
 
 #ifdef CONFIG_SMP
 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/trace] x86, trace: Add irq vector tracepoints

2013-06-20 Thread tip-bot for Seiji Aguchi
Commit-ID:  cf910e83ae23692fdeefc7e506e504c4c468d38a
Gitweb: http://git.kernel.org/tip/cf910e83ae23692fdeefc7e506e504c4c468d38a
Author: Seiji Aguchi 
AuthorDate: Thu, 20 Jun 2013 11:46:53 -0400
Committer:  H. Peter Anvin 
CommitDate: Thu, 20 Jun 2013 22:25:34 -0700

x86, trace: Add irq vector tracepoints

[Purpose of this patch]

As Vaibhav explained in the thread below, tracepoints for irq vectors
are useful.

http://www.spinics.net/lists/mm-commits/msg85707.html


The current interrupt traces from irq_handler_entry and irq_handler_exit
provide when an interrupt is handled.  They provide good data about when
the system has switched to kernel space and how it affects the currently
running processes.

There are some IRQ vectors which trigger the system into kernel space,
which are not handled in generic IRQ handlers.  Tracing such events gives
us the information about IRQ interaction with other system events.

The trace also tells where the system is spending its time.  We want to
know which cores are handling interrupts and how they are affecting other
processes in the system.  Also, the trace provides information about when
the cores are idle and which interrupts are changing that state.


On the other hand, my usecase is tracing just local timer event and
getting a value of instruction pointer.

I suggested to add an argument local timer event to get instruction pointer 
before.
But there is another way to get it with external module like systemtap.
So, I don't need to add any argument to irq vector tracepoints now.

[Patch Description]

Vaibhav's patch shared a trace point ,irq_vector_entry/irq_vector_exit, in all 
events.
But there is an above use case to trace specific irq_vector rather than tracing 
all events.
In this case, we are concerned about overhead due to unwanted events.

So, add following tracepoints instead of introducing irq_vector_entry/exit.
so that we can enable them independently.
   - local_timer_vector
   - reschedule_vector
   - call_function_vector
   - call_function_single_vector
   - irq_work_entry_vector
   - error_apic_vector
   - thermal_apic_vector
   - threshold_apic_vector
   - spurious_apic_vector
   - x86_platform_ipi_vector

Also, introduce a logic switching IDT at enabling/disabling time so that a time 
penalty
makes a zero when tracepoints are disabled. Detailed explanations are as 
follows.
 - Create trace irq handlers with entering_irq()/exiting_irq().
 - Create a new IDT, trace_idt_table, at boot time by adding a logic to
   _set_gate(). It is just a copy of original idt table.
 - Register the new handlers for tracpoints to the new IDT by introducing
   macros to alloc_intr_gate() called at registering time of irq_vector 
handlers.
 - Add checking, whether irq vector tracing is on/off, into load_current_idt().
   This has to be done below debug checking for these reasons.
   - Switching to debug IDT may be kicked while tracing is enabled.
   - On the other hands, switching to trace IDT is kicked only when debugging
 is disabled.

In addition, the new IDT is created only when CONFIG_TRACING is enabled to 
avoid being
used for other purposes.

Signed-off-by: Seiji Aguchi 
Link: http://lkml.kernel.org/r/51c323ed.5050...@hds.com
Signed-off-by: H. Peter Anvin 
Cc: Steven Rostedt 
---
 arch/x86/include/asm/desc.h  |  72 -
 arch/x86/include/asm/entry_arch.h|   8 ++-
 arch/x86/include/asm/hw_irq.h|  17 +
 arch/x86/include/asm/mshyperv.h  |   3 +
 arch/x86/include/asm/trace/irq_vectors.h | 104 +++
 arch/x86/include/asm/uv/uv_bau.h |   3 +
 arch/x86/kernel/Makefile |   1 +
 arch/x86/kernel/apic/Makefile|   1 +
 arch/x86/kernel/apic/apic.c  |  42 +
 arch/x86/kernel/cpu/common.c |   4 +-
 arch/x86/kernel/cpu/mcheck/therm_throt.c |  10 +++
 arch/x86/kernel/cpu/mcheck/threshold.c   |  10 +++
 arch/x86/kernel/entry_32.S   |  12 +++-
 arch/x86/kernel/entry_64.S   |  31 ++---
 arch/x86/kernel/head_64.S|   6 ++
 arch/x86/kernel/irq.c|  13 
 arch/x86/kernel/irq_work.c   |  10 +++
 arch/x86/kernel/smp.c|  30 +
 arch/x86/kernel/tracepoint.c |  57 +
 include/xen/events.h |   3 +
 20 files changed, 422 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index af290b8..1377ecb 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -320,6 +320,19 @@ static inline void set_nmi_gate(int gate, void *addr)
 }
 #endif
 
+#ifdef CONFIG_TRACING
+extern struct desc_ptr trace_idt_descr;
+extern gate_desc trace_idt_table[];
+static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
+{
+   write_idt_entry(trace_idt_table, entry, gate);
+}
+#else
+static inline void write_trace_idt_

[tip:x86/trace] x86, trace: Introduce entering/exiting_irq()

2013-06-20 Thread tip-bot for Seiji Aguchi
Commit-ID:  eddc0e922a3530e0f22cef170229bcae3a7d5e31
Gitweb: http://git.kernel.org/tip/eddc0e922a3530e0f22cef170229bcae3a7d5e31
Author: Seiji Aguchi 
AuthorDate: Thu, 20 Jun 2013 11:45:17 -0400
Committer:  H. Peter Anvin 
CommitDate: Thu, 20 Jun 2013 22:25:01 -0700

x86, trace: Introduce entering/exiting_irq()

When implementing tracepoints in interrupt handers, if the tracepoints are
simply added in the performance sensitive path of interrupt handers,
it may cause potential performance problem due to the time penalty.

To solve the problem, an idea is to prepare non-trace/trace irq handers and
switch their IDTs at the enabling/disabling time.

So, let's introduce entering_irq()/exiting_irq() for pre/post-
processing of each irq handler.

A way to use them is as follows.

Non-trace irq handler:
smp_irq_handler()
{
entering_irq(); /* pre-processing of this handler */
__smp_irq_handler();/*
 * common logic between non-trace and trace 
handlers
 * in a vector.
 */
exiting_irq();  /* post-processing of this handler */

}

Trace irq_handler:
smp_trace_irq_handler()
{
entering_irq(); /* pre-processing of this handler */
trace_irq_entry();  /* tracepoint for irq entry */
__smp_irq_handler();/*
 * common logic between non-trace and trace 
handlers
 * in a vector.
 */
trace_irq_exit();   /* tracepoint for irq exit */
exiting_irq();  /* post-processing of this handler */

}

If tracepoints can place outside entering_irq()/exiting_irq() as follows,
it looks cleaner.

smp_trace_irq_handler()
{
trace_irq_entry();
smp_irq_handler();
trace_irq_exit();
}

But it doesn't work.
The problem is with irq_enter/exit() being called. They must be called before
trace_irq_enter/exit(),  because of the rcu_irq_enter() must be called before
any tracepoints are used, as tracepoints use  rcu to synchronize.

As a possible alternative, we may be able to call irq_enter() first as follows
if irq_enter() can nest.

smp_trace_irq_hander()
{
irq_entry();
trace_irq_entry();
smp_irq_handler();
trace_irq_exit();
irq_exit();
}

But it doesn't work, either.
If irq_enter() is nested, it may have a time penalty because it has to check if 
it
was already called or not. The time penalty is not desired in performance 
sensitive
paths even if it is tiny.

Signed-off-by: Seiji Aguchi 
Link: http://lkml.kernel.org/r/51c3238d.9040...@hds.com
Signed-off-by: H. Peter Anvin 
Cc: Steven Rostedt 
---
 arch/x86/include/asm/apic.h  | 27 
 arch/x86/kernel/apic/apic.c  | 33 +-
 arch/x86/kernel/cpu/mcheck/therm_throt.c | 14 +++--
 arch/x86/kernel/cpu/mcheck/threshold.c   | 14 +++--
 arch/x86/kernel/irq.c| 18 
 arch/x86/kernel/irq_work.c   | 14 +++--
 arch/x86/kernel/smp.c| 35 
 7 files changed, 109 insertions(+), 46 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3388034..f8119b5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define ARCH_APICTIMER_STOPS_ON_C3 1
 
@@ -687,5 +688,31 @@ extern int default_check_phys_apicid_present(int 
phys_apicid);
 #endif
 
 #endif /* CONFIG_X86_LOCAL_APIC */
+extern void irq_enter(void);
+extern void irq_exit(void);
+
+static inline void entering_irq(void)
+{
+   irq_enter();
+   exit_idle();
+}
+
+static inline void entering_ack_irq(void)
+{
+   ack_APIC_irq();
+   entering_irq();
+}
+
+static inline void exiting_irq(void)
+{
+   irq_exit();
+}
+
+static inline void exiting_ack_irq(void)
+{
+   irq_exit();
+   /* Ack only at the end to avoid potential reentry */
+   ack_APIC_irq();
+}
 
 #endif /* _ASM_X86_APIC_H */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 904611b..59ee76f 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -919,17 +919,14 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs 
*regs)
/*
 * NOTE! We'd better ACK the irq immediately,
 * because timer handling can be slow.
-*/
-   ack_APIC_irq();
-   /*
+*
 * update_process_times() expects us to have done irq_enter().
 * Besides, if we don't timer interrupts ignore the global
 * interrupt lock, which is the WrongThing (tm) to do.
 */
-   irq_enter();
-   exit_idle();
+   entering_ack_irq();
local_apic_timer_interrupt();
-   irq_exit();
+   exiting_irq();
 
   

[tip:x86/trace] x86: Rename variables for debugging

2013-06-20 Thread tip-bot for Seiji Aguchi
Commit-ID:  629f4f9d59a27d8e58aa612e886e6a9a63ea7aeb
Gitweb: http://git.kernel.org/tip/629f4f9d59a27d8e58aa612e886e6a9a63ea7aeb
Author: Seiji Aguchi 
AuthorDate: Thu, 20 Jun 2013 11:45:44 -0400
Committer:  H. Peter Anvin 
CommitDate: Thu, 20 Jun 2013 22:25:13 -0700

x86: Rename variables for debugging

Rename variables for debugging to describe meaning of them precisely.

Also, introduce a generic way to switch IDT by checking a current state,
debug on/off.

Signed-off-by: Seiji Aguchi 
Link: http://lkml.kernel.org/r/51c323a8.7050...@hds.com
Signed-off-by: H. Peter Anvin 
Cc: Steven Rostedt 
---
 arch/x86/include/asm/desc.h  | 47 +---
 arch/x86/kernel/cpu/common.c | 16 +++
 arch/x86/kernel/head_64.S|  2 +-
 arch/x86/kernel/traps.c  |  2 +-
 4 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 8bf1c06..af290b8 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -36,8 +36,8 @@ static inline void fill_ldt(struct desc_struct *desc, const 
struct user_desc *in
 
 extern struct desc_ptr idt_descr;
 extern gate_desc idt_table[];
-extern struct desc_ptr nmi_idt_descr;
-extern gate_desc nmi_idt_table[];
+extern struct desc_ptr debug_idt_descr;
+extern gate_desc debug_idt_table[];
 
 struct gdt_page {
struct desc_struct gdt[GDT_ENTRIES];
@@ -316,7 +316,7 @@ static inline void set_nmi_gate(int gate, void *addr)
gate_desc s;
 
pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
-   write_idt_entry(nmi_idt_table, gate, &s);
+   write_idt_entry(debug_idt_table, gate, &s);
 }
 #endif
 
@@ -405,4 +405,45 @@ static inline void set_system_intr_gate_ist(int n, void 
*addr, unsigned ist)
_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
 }
 
+#ifdef CONFIG_X86_64
+DECLARE_PER_CPU(u32, debug_idt_ctr);
+static inline bool is_debug_idt_enabled(void)
+{
+   if (this_cpu_read(debug_idt_ctr))
+   return true;
+
+   return false;
+}
+
+static inline void load_debug_idt(void)
+{
+   load_idt((const struct desc_ptr *)&debug_idt_descr);
+}
+#else
+static inline bool is_debug_idt_enabled(void)
+{
+   return false;
+}
+
+static inline void load_debug_idt(void)
+{
+}
+#endif
+
+/*
+ * the load_current_idt() is called with interrupt disabled by local_irq_save()
+ * to avoid races. That way the IDT will always be set back to the expected
+ * descriptor.
+ */
+static inline void load_current_idt(void)
+{
+   unsigned long flags;
+
+   local_irq_save(flags);
+   if (is_debug_idt_enabled())
+   load_debug_idt();
+   else
+   load_idt((const struct desc_ptr *)&idt_descr);
+   local_irq_restore(flags);
+}
 #endif /* _ASM_X86_DESC_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 22018f7..8f6a0f9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1071,8 +1071,8 @@ __setup("clearcpuid=", setup_disablecpuid);
 
 #ifdef CONFIG_X86_64
 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
-struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
-   (unsigned long) nmi_idt_table };
+struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
+   (unsigned long) debug_idt_table };
 
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
 irq_stack_union) __aligned(PAGE_SIZE);
@@ -1148,20 +1148,20 @@ int is_debug_stack(unsigned long addr)
 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
 }
 
-static DEFINE_PER_CPU(u32, debug_stack_use_ctr);
+DEFINE_PER_CPU(u32, debug_idt_ctr);
 
 void debug_stack_set_zero(void)
 {
-   this_cpu_inc(debug_stack_use_ctr);
-   load_idt((const struct desc_ptr *)&nmi_idt_descr);
+   this_cpu_inc(debug_idt_ctr);
+   load_current_idt();
 }
 
 void debug_stack_reset(void)
 {
-   if (WARN_ON(!this_cpu_read(debug_stack_use_ctr)))
+   if (WARN_ON(!this_cpu_read(debug_idt_ctr)))
return;
-   if (this_cpu_dec_return(debug_stack_use_ctr) == 0)
-   load_idt((const struct desc_ptr *)&idt_descr);
+   if (this_cpu_dec_return(debug_idt_ctr) == 0)
+   load_current_idt();
 }
 
 #else  /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 321d65e..84fb779 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -518,7 +518,7 @@ ENTRY(idt_table)
.skip IDT_ENTRIES * 16
 
.align L1_CACHE_BYTES
-ENTRY(nmi_idt_table)
+ENTRY(debug_idt_table)
.skip IDT_ENTRIES * 16
 
__PAGE_ALIGNED_BSS
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 772e2a8..d27182d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -785,7 +785,7 @@ void __init trap_init(void)
x86_init.irqs.tra