> > @@ -1156,8 +1156,14 @@ void debug_stack_reset(void)
Thank you for reviewing.

> >  {
> >     if (WARN_ON(!this_cpu_read(debug_stack_use_ctr)))
> >             return;
> > -   if (this_cpu_dec_return(debug_stack_use_ctr) == 0)
> > -           load_idt((const struct desc_ptr *)&idt_descr);
> > +   if (this_cpu_dec_return(debug_stack_use_ctr) == 0) {
> > +#ifdef CONFIG_TRACING
> > +           if (this_cpu_read(trace_idt_in_use))
> > +                   load_idt((const struct desc_ptr *)&trace_idt_descr);
> > +           else
> > +#endif
> > +                   load_idt((const struct desc_ptr *)&idt_descr);
> > +   }
> 
> I have a patch that makes this more generic. I'm wondering if that would
> be better than having everything so coupled.
>

Do you mean it should be discussed in another thread as you posted "[PATCH] 
x86: Have debug/nmi restore the IDT it replaced"?

Anyway, I will update my patch in accordance with your comments.

Seiji

> -----Original Message-----
> From: Steven Rostedt [mailto:[email protected]]
> Sent: Thursday, May 23, 2013 3:52 PM
> To: Seiji Aguchi
> Cc: [email protected]; [email protected]; [email protected]; Thomas 
> Gleixner ([email protected]); '[email protected]'
> ([email protected]); Borislav Petkov ([email protected]); 
> [email protected]; Luck, Tony ([email protected]); dle-
> [email protected]; Tomoki Sekiyama
> Subject: Re: [PATCH v12 2/3] trace,x86: add x86 irq vector tracepoints
> 
> On Fri, 2013-04-05 at 19:20 +0000, Seiji Aguchi wrote:
> > Signed-off-by: Seiji Aguchi <[email protected]>
> > ---
> >  arch/x86/include/asm/desc.h              |   36 +++++++-
> >  arch/x86/include/asm/entry_arch.h        |    5 +-
> >  arch/x86/include/asm/hw_irq.h            |   16 +++
> >  arch/x86/include/asm/mshyperv.h          |    1 +
> >  arch/x86/include/asm/trace/irq_vectors.h |  159 
> > ++++++++++++++++++++++++++++++
> >  arch/x86/kernel/Makefile                 |    1 +
> >  arch/x86/kernel/apic/apic.c              |   94 ++++++++++++++++++
> >  arch/x86/kernel/cpu/common.c             |   12 ++-
> >  arch/x86/kernel/cpu/mcheck/therm_throt.c |   14 +++
> >  arch/x86/kernel/cpu/mcheck/threshold.c   |   14 +++
> >  arch/x86/kernel/entry_32.S               |   12 ++-
> >  arch/x86/kernel/entry_64.S               |   29 +++++-
> >  arch/x86/kernel/head_64.S                |    6 +
> >  arch/x86/kernel/irq.c                    |   23 +++++
> >  arch/x86/kernel/irq_work.c               |   12 +++
> >  arch/x86/kernel/smp.c                    |   35 +++++++
> >  arch/x86/kernel/tracepoint.c             |   71 +++++++++++++
> >  include/xen/events.h                     |    3 +
> >  18 files changed, 529 insertions(+), 14 deletions(-)
> >  create mode 100644 arch/x86/include/asm/trace/irq_vectors.h
> >  create mode 100644 arch/x86/kernel/tracepoint.c
> >
> > diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
> > index 8bf1c06..e718c72 100644
> > --- a/arch/x86/include/asm/desc.h
> > +++ b/arch/x86/include/asm/desc.h
> > @@ -39,6 +39,9 @@ extern gate_desc idt_table[];
> >  extern struct desc_ptr nmi_idt_descr;
> >  extern gate_desc nmi_idt_table[];
> >
> > +extern struct desc_ptr trace_idt_descr;
> > +extern DEFINE_PER_CPU(u32, trace_idt_in_use);
> > +extern DEFINE_PER_CPU(u32, debug_stack_use_ctr);
> 
> Header files should use DECLARE_PER_CPU() not DEFINE_PER_CPU. And drop
> the "extern" when doing so, as it's implicit in the macro.
> 
> 
> >  struct gdt_page {
> >     struct desc_struct gdt[GDT_ENTRIES];
> >  } __attribute__((aligned(PAGE_SIZE)));
> > @@ -320,6 +323,17 @@ static inline void set_nmi_gate(int gate, void *addr)
> >  }
> >  #endif
> >
> > +#ifdef CONFIG_TRACING
> > +extern gate_desc trace_idt_table[];
> > +static inline void trace_set_intr_gate(unsigned int gate, void *addr)
> > +{
> > +   gate_desc s;
> > +
> > +   pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
> > +   write_idt_entry(trace_idt_table, gate, &s);
> > +}
> 
> Perhaps add a:
> 
> static inline void write_trace_idt_entry(gate_desc *idt, int entry,
>                               const gate_desc *gate)
> {
>       write_idt_entry(idt, entry, gate);
> }
> #else
> static inline void write_trace_idt_entry(gate_desc *idt, int entry,
>                               const gate_desc *gate)
> {
> }
> > +#endif
> 
> Then you can below just do:
> 
> 
> > +
> >  static inline void _set_gate(int gate, unsigned type, void *addr,
> >                          unsigned dpl, unsigned ist, unsigned seg)
> >  {
> > @@ -331,6 +345,9 @@ static inline void _set_gate(int gate, unsigned type, 
> > void *addr,
> >      * setup time
> >      */
> >     write_idt_entry(idt_table, gate, &s);
> 
>       write_trace_idt_entry(idt_table, gate, &s);
> 
> and remove the #ifdef. Makes it nice to read.
> 
> > +#ifdef CONFIG_TRACING
> > +   write_idt_entry(trace_idt_table, gate, &s);
> > +#endif
> >  }
> >
> >  /*
> > @@ -360,12 +377,27 @@ static inline void alloc_system_vector(int vector)
> >     }
> >  }
> >
> > -static inline void alloc_intr_gate(unsigned int n, void *addr)
> > +#ifdef CONFIG_TRACING
> > +static inline void __trace_alloc_intr_gate(unsigned int n, void *addr)
> > +{
> > +   trace_set_intr_gate(n, addr);
> > +}
> > +#else
> > +#define __trace_alloc_intr_gate(n, addr)
> > +#endif
> > +
> > +static inline void __alloc_intr_gate(unsigned int n, void *addr)
> >  {
> > -   alloc_system_vector(n);
> >     set_intr_gate(n, addr);
> >  }
> >
> > +#define alloc_intr_gate(n, addr)                           \
> > +   do {                                                    \
> > +           alloc_system_vector(n);                         \
> > +           __alloc_intr_gate(n, addr);                     \
> > +           __trace_alloc_intr_gate(n, trace_##addr);       \
> > +   } while (0)
> > +
> >  /*
> >   * This routine sets up an interrupt gate at directory privilege level 3.
> >   */
> > diff --git a/arch/x86/include/asm/entry_arch.h 
> > b/arch/x86/include/asm/entry_arch.h
> > index 40afa00..0bb99d8 100644
> > --- a/arch/x86/include/asm/entry_arch.h
> > +++ b/arch/x86/include/asm/entry_arch.h
> > @@ -13,8 +13,9 @@
> >  BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
> >  BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
> >  BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
> > -BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
> > -BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
> > +BUILD_INTERRUPT3(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR,
> > +            smp_irq_move_cleanup_interrupt)
> > +BUILD_INTERRUPT3(reboot_interrupt, REBOOT_VECTOR, smp_reboot_interrupt)
> >  #endif
> >
> >  BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
> > diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
> > index 10a78c3..e84e91a 100644
> > --- a/arch/x86/include/asm/hw_irq.h
> > +++ b/arch/x86/include/asm/hw_irq.h
> > @@ -76,6 +76,22 @@ extern void threshold_interrupt(void);
> >  extern void call_function_interrupt(void);
> >  extern void call_function_single_interrupt(void);
> >
> > +#ifdef CONFIG_TRACING
> > +/* Interrupt handlers registered during init_IRQ */
> > +extern void trace_apic_timer_interrupt(void);
> > +extern void trace_x86_platform_ipi(void);
> > +extern void trace_error_interrupt(void);
> > +extern void trace_irq_work_interrupt(void);
> > +extern void trace_spurious_interrupt(void);
> > +extern void trace_thermal_interrupt(void);
> > +extern void trace_reschedule_interrupt(void);
> > +extern void trace_threshold_interrupt(void);
> > +extern void trace_call_function_interrupt(void);
> > +extern void trace_call_function_single_interrupt(void);
> > +#define trace_irq_move_cleanup_interrupt  irq_move_cleanup_interrupt
> > +#define trace_reboot_interrupt  reboot_interrupt
> > +#endif /* CONFIG_TRACING */
> > +
> >  /* IOAPIC */
> >  #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & 
> > io_apic_irqs))
> >  extern unsigned long io_apic_irqs;
> > diff --git a/arch/x86/include/asm/mshyperv.h 
> > b/arch/x86/include/asm/mshyperv.h
> > index c2934be..cc40e03 100644
> > --- a/arch/x86/include/asm/mshyperv.h
> > +++ b/arch/x86/include/asm/mshyperv.h
> > @@ -12,6 +12,7 @@ struct ms_hyperv_info {
> >  extern struct ms_hyperv_info ms_hyperv;
> >
> >  void hyperv_callback_vector(void);
> > +#define trace_hyperv_callback_vector hyperv_callback_vector
> >  void hyperv_vector_handler(struct pt_regs *regs);
> >  void hv_register_vmbus_handler(int irq, irq_handler_t handler);
> >
> > diff --git a/arch/x86/include/asm/trace/irq_vectors.h 
> > b/arch/x86/include/asm/trace/irq_vectors.h
> > new file mode 100644
> > index 0000000..b4f1c53
> > --- /dev/null
> > +++ b/arch/x86/include/asm/trace/irq_vectors.h
> > @@ -0,0 +1,159 @@
> > +#undef TRACE_SYSTEM
> > +#define TRACE_SYSTEM irq_vectors
> > +
> > +#if !defined(_TRACE_IRQ_VECTORS_H) || defined(TRACE_HEADER_MULTI_READ)
> > +#define _TRACE_IRQ_VECTORS_H
> > +
> > +#include <linux/tracepoint.h>
> > +
> > +extern void trace_irq_vector_regfunc(void);
> > +extern void trace_irq_vector_unregfunc(void);
> > +
> > +DECLARE_EVENT_CLASS(x86_irq_vector,
> > +
> > +   TP_PROTO(int vector),
> > +
> > +   TP_ARGS(vector),
> > +
> > +   TP_STRUCT__entry(
> > +           __field(                int,    vector  )
> > +   ),
> > +
> > +   TP_fast_assign(
> > +           __entry->vector = vector;
> > +   ),
> > +
> > +   TP_printk("vector=%d", __entry->vector) );
> > +
> > +#define DEFINE_IRQ_VECTOR_EVENT(name)      \
> > +DEFINE_EVENT_FN(x86_irq_vector, name,      \
> > +   TP_PROTO(int vector),           \
> > +   TP_ARGS(vector),                \
> > +   trace_irq_vector_regfunc,       \
> > +   trace_irq_vector_unregfunc);
> > +
> > +/*
> > + * local_timer_entry - called before entering a local timer interrupt
> > + * vector handler
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(local_timer_entry);
> > +
> > +/*
> > + * local_timer_exit - called immediately after the interrupt vector
> > + * handler returns
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(local_timer_exit);
> 
> Looks like you can save on repeat code by doing the following:
> 
> #define DEFINE_IRQ_VECTOR_EVENT(name)         \
> DEFINE_EVENT_FN(x86_irq_vector, name##_entry, \
>       TP_PROTO(int vector),                   \
>       TP_ARGS(vector),                        \
>       trace_irq_vector_regfunc,               \
>       trace_irq_vector_unregfunc);            \
> DEFINE_EVENT_FN(x86_irq_vector, name##_exit,  \
>       TP_PROTO(int vector),                   \
>       TP_ARGS(vector),                        \
>       trace_irq_vector_regfunc,               \
>       trace_irq_vector_unregfunc);            \
> 
> Then just do for each:
> 
> DEFINE_IRQ_VECTOR_EVENT(local_timer);
> 
> [...]
> 
> > +
> > +/*
> > + * reschedule_entry - called before entering a reschedule vector handler
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(reschedule_entry);
> > +
> > +/*
> > + * reschedule_exit - called immediately after the interrupt vector
> > + * handler returns
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(reschedule_exit);
> > +
> > +/*
> > + * spurious_apic_entry - called before entering a spurious apic vector 
> > handler
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(spurious_apic_entry);
> > +
> > +/*
> > + * spurious_apic_exit - called immediately after the interrupt vector
> > + * handler returns
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(spurious_apic_exit);
> > +
> > +/*
> > + * error_apic_entry - called before entering an error apic vector handler
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(error_apic_entry);
> > +
> > +/*
> > + * error_apic_exit - called immediately after the interrupt vector
> > + * handler returns
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(error_apic_exit);
> > +
> > +/*
> > + * x86_platform_ipi_entry - called before entering a x86 platform ipi 
> > interrupt
> > + * vector handler
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi_entry);
> > +
> > +/*
> > + * x86_platform_ipi_exit - called immediately after the interrupt vector
> > + * handler returns
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi_exit);
> > +
> > +/*
> > + * irq_work_entry - called before entering a irq work interrupt
> > + * vector handler
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(irq_work_entry);
> > +
> > +/*
> > + * irq_work_exit - called immediately after the interrupt vector
> > + * handler returns
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(irq_work_exit);
> > +
> > +/*
> > + * call_function_entry - called before entering a call function interrupt
> > + * vector handler
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(call_function_entry);
> > +
> > +/*
> > + * call_function_exit - called immediately after the interrupt vector
> > + * handler returns
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(call_function_exit);
> > +
> > +/*
> > + * call_function_single_entry - called before entering a call function
> > + * single interrupt vector handler
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(call_function_single_entry);
> > +
> > +/*
> > + * call_function_single_exit - called immediately after the interrupt 
> > vector
> > + * handler returns
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(call_function_single_exit);
> > +
> > +/*
> > + * threshold_apic_entry - called before entering a threshold apic interrupt
> > + * vector handler
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(threshold_apic_entry);
> > +
> > +/*
> > + * threshold_apic_exit - called immediately after the interrupt vector
> > + * handler returns
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(threshold_apic_exit);
> > +
> > +/*
> > + * thermal_apic_entry - called before entering a thermal apic interrupt
> > + * vector handler
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(thermal_apic_entry);
> > +
> > +/*
> > + * thrmal_apic_exit - called immediately after the interrupt vector
> > + * handler returns
> > + */
> > +DEFINE_IRQ_VECTOR_EVENT(thermal_apic_exit);
> > +
> > +#undef TRACE_INCLUDE_PATH
> > +#define TRACE_INCLUDE_PATH ../../arch/x86/include/asm/trace
> 
> Hmm, this is dangerous. It's better to use the Makefile.
> 
> here do:
> 
> #define TRACE_INCLUDE_PATH .
> 
> > +#define TRACE_INCLUDE_FILE irq_vectors
> > +#endif /*  _TRACE_IRQ_VECTORS_H */
> 
> Then in arch/x86/kernel/apic/Makefile:
> 
> CFLAGS_apic.o := -I$(src)
> 
> > +
> > +/* This part must be outside protection */
> > +#include <trace/define_trace.h>
> > +
> > diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> > index 7bd3bd3..74b3891 100644
> > --- a/arch/x86/kernel/Makefile
> > +++ b/arch/x86/kernel/Makefile
> > @@ -102,6 +102,7 @@ obj-$(CONFIG_OF)                        += devicetree.o
> >  obj-$(CONFIG_UPROBES)                      += uprobes.o
> >
> >  obj-$(CONFIG_PERF_EVENTS)          += perf_regs.o
> > +obj-$(CONFIG_TRACING)                      += tracepoint.o
> >
> >  ###
> >  # 64 bit specific files
> > diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
> > index 904611b..7d39c68 100644
> > --- a/arch/x86/kernel/apic/apic.c
> > +++ b/arch/x86/kernel/apic/apic.c
> > @@ -55,6 +55,9 @@
> >  #include <asm/tsc.h>
> >  #include <asm/hypervisor.h>
> >
> > +#define CREATE_TRACE_POINTS
> > +#include <asm/trace/irq_vectors.h>
> > +
> >  unsigned int num_processors;
> >
> >  unsigned disabled_cpus __cpuinitdata;
> > @@ -934,6 +937,30 @@ void __irq_entry smp_apic_timer_interrupt(struct 
> > pt_regs *regs)
> >     set_irq_regs(old_regs);
> >  }
> >
> > +void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
> > +{
> > +   struct pt_regs *old_regs = set_irq_regs(regs);
> > +
> > +   /*
> > +    * NOTE! We'd better ACK the irq immediately,
> > +    * because timer handling can be slow.
> > +    */
> > +   ack_APIC_irq();
> > +   /*
> > +    * update_process_times() expects us to have done irq_enter().
> > +    * Besides, if we don't timer interrupts ignore the global
> > +    * interrupt lock, which is the WrongThing (tm) to do.
> > +    */
> > +   irq_enter();
> > +   exit_idle();
> > +   trace_local_timer_entry(LOCAL_TIMER_VECTOR);
> > +   local_apic_timer_interrupt();
> > +   trace_local_timer_exit(LOCAL_TIMER_VECTOR);
> > +   irq_exit();
> > +
> > +   set_irq_regs(old_regs);
> > +}
> > +
> >  int setup_profiling_timer(unsigned int multiplier)
> >  {
> >     return -EINVAL;
> > @@ -1930,6 +1957,31 @@ void smp_spurious_interrupt(struct pt_regs *regs)
> >     irq_exit();
> >  }
> >
> > +void smp_trace_spurious_interrupt(struct pt_regs *regs)
> > +{
> > +   u32 v;
> > +
> > +   irq_enter();
> > +   exit_idle();
> > +   trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR);
> > +   /*
> > +    * Check if this really is a spurious interrupt and ACK it
> > +    * if it is a vectored one.  Just in case...
> > +    * Spurious interrupts should not be ACKed.
> > +    */
> > +   v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
> > +   if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
> > +           ack_APIC_irq();
> > +
> > +   inc_irq_stat(irq_spurious_count);
> > +
> > +   /* see sw-dev-man vol 3, chapter 7.4.13.5 */
> > +   pr_info("spurious APIC interrupt on CPU#%d, "
> > +           "should never happen.\n", smp_processor_id());
> > +   trace_spurious_apic_exit(SPURIOUS_APIC_VECTOR);
> > +   irq_exit();
> > +}
> > +
> >  /*
> >   * This interrupt should never happen with our APIC/SMP architecture
> >   */
> > @@ -1973,6 +2025,48 @@ void smp_error_interrupt(struct pt_regs *regs)
> >     irq_exit();
> >  }
> >
> > +void smp_trace_error_interrupt(struct pt_regs *regs)
> > +{
> > +   u32 v0, v1;
> > +   u32 i = 0;
> > +   static const char * const error_interrupt_reason[] = {
> > +           "Send CS error",                /* APIC Error Bit 0 */
> > +           "Receive CS error",             /* APIC Error Bit 1 */
> > +           "Send accept error",            /* APIC Error Bit 2 */
> > +           "Receive accept error",         /* APIC Error Bit 3 */
> > +           "Redirectable IPI",             /* APIC Error Bit 4 */
> > +           "Send illegal vector",          /* APIC Error Bit 5 */
> > +           "Received illegal vector",      /* APIC Error Bit 6 */
> > +           "Illegal register address",     /* APIC Error Bit 7 */
> > +   };
> > +
> > +   irq_enter();
> > +   exit_idle();
> > +   trace_error_apic_entry(ERROR_APIC_VECTOR);
> > +   /* First tickle the hardware, only then report what went on. -- REW */
> > +   v0 = apic_read(APIC_ESR);
> > +   apic_write(APIC_ESR, 0);
> > +   v1 = apic_read(APIC_ESR);
> > +   ack_APIC_irq();
> > +   atomic_inc(&irq_err_count);
> > +
> > +   apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)",
> > +               smp_processor_id(), v0 , v1);
> > +
> > +   v1 = v1 & 0xff;
> > +   while (v1) {
> > +           if (v1 & 0x1)
> > +                   apic_printk(APIC_DEBUG, KERN_CONT " : %s", 
> > error_interrupt_reason[i]);
> > +           i++;
> > +           v1 >>= 1;
> > +   }
> > +
> > +   apic_printk(APIC_DEBUG, KERN_CONT "\n");
> > +
> > +   trace_error_apic_exit(ERROR_APIC_VECTOR);
> > +   irq_exit();
> > +}
> > +
> >  /**
> >   * connect_bsp_APIC - attach the APIC to the interrupt system
> >   */
> > diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> > index d814772..23045cd 100644
> > --- a/arch/x86/kernel/cpu/common.c
> > +++ b/arch/x86/kernel/cpu/common.c
> > @@ -1144,7 +1144,7 @@ int is_debug_stack(unsigned long addr)
> >              addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
> >  }
> >
> > -static DEFINE_PER_CPU(u32, debug_stack_use_ctr);
> > +DEFINE_PER_CPU(u32, debug_stack_use_ctr);
> >
> >  void debug_stack_set_zero(void)
> >  {
> > @@ -1156,8 +1156,14 @@ void debug_stack_reset(void)
> >  {
> >     if (WARN_ON(!this_cpu_read(debug_stack_use_ctr)))
> >             return;
> > -   if (this_cpu_dec_return(debug_stack_use_ctr) == 0)
> > -           load_idt((const struct desc_ptr *)&idt_descr);
> > +   if (this_cpu_dec_return(debug_stack_use_ctr) == 0) {
> > +#ifdef CONFIG_TRACING
> > +           if (this_cpu_read(trace_idt_in_use))
> > +                   load_idt((const struct desc_ptr *)&trace_idt_descr);
> > +           else
> > +#endif
> > +                   load_idt((const struct desc_ptr *)&idt_descr);
> > +   }
> 
> I have a patch that makes this more generic. I'm wondering if that would
> be better than having everything so coupled.
> 
> >  }
> >
> >  #else      /* CONFIG_X86_64 */
> > diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c 
> > b/arch/x86/kernel/cpu/mcheck/therm_throt.c
> > index 47a1870..e7aa7fc 100644
> > --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
> > +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
> > @@ -29,6 +29,7 @@
> >  #include <asm/idle.h>
> >  #include <asm/mce.h>
> >  #include <asm/msr.h>
> > +#include <asm/trace/irq_vectors.h>
> >
> >  /* How long to wait between reporting thermal events */
> >  #define CHECK_INTERVAL             (300 * HZ)
> > @@ -389,6 +390,19 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs 
> > *regs)
> >     ack_APIC_irq();
> >  }
> >
> > +asmlinkage void smp_trace_thermal_interrupt(struct pt_regs *regs)
> > +{
> > +   irq_enter();
> > +   exit_idle();
> > +   trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
> > +   inc_irq_stat(irq_thermal_count);
> > +   smp_thermal_vector();
> > +   trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
> > +   irq_exit();
> > +   /* Ack only at the end to avoid potential reentry */
> > +   ack_APIC_irq();
> > +}
> > +
> >  /* Thermal monitoring depends on APIC, ACPI and clock modulation */
> >  static int intel_thermal_supported(struct cpuinfo_x86 *c)
> >  {
> > diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c 
> > b/arch/x86/kernel/cpu/mcheck/threshold.c
> > index aa578ca..0cbef99 100644
> > --- a/arch/x86/kernel/cpu/mcheck/threshold.c
> > +++ b/arch/x86/kernel/cpu/mcheck/threshold.c
> > @@ -8,6 +8,7 @@
> >  #include <asm/apic.h>
> >  #include <asm/idle.h>
> >  #include <asm/mce.h>
> > +#include <asm/trace/irq_vectors.h>
> >
> >  static void default_threshold_interrupt(void)
> >  {
> > @@ -27,3 +28,16 @@ asmlinkage void smp_threshold_interrupt(void)
> >     /* Ack only at the end to avoid potential reentry */
> >     ack_APIC_irq();
> >  }
> > +
> > +asmlinkage void smp_trace_threshold_interrupt(void)
> > +{
> > +   irq_enter();
> > +   exit_idle();
> > +   trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
> > +   inc_irq_stat(irq_threshold_count);
> > +   mce_threshold_vector();
> > +   trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
> > +   irq_exit();
> > +   /* Ack only at the end to avoid potential reentry */
> > +   ack_APIC_irq();
> > +}
> > diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
> > index 8f3e2de..2cfbc3a 100644
> > --- a/arch/x86/kernel/entry_32.S
> > +++ b/arch/x86/kernel/entry_32.S
> > @@ -801,7 +801,17 @@ ENTRY(name)                            \
> >     CFI_ENDPROC;                    \
> >  ENDPROC(name)
> >
> > -#define BUILD_INTERRUPT(name, nr)  BUILD_INTERRUPT3(name, nr, smp_##name)
> > +
> > +#ifdef CONFIG_TRACING
> > +#define TRACE_BUILD_INTERRUPT(name, nr)            \
> > +   BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
> > +#else
> > +#define TRACE_BUILD_INTERRUPT(name, nr)
> > +#endif
> > +
> > +#define BUILD_INTERRUPT(name, nr) \
> > +   BUILD_INTERRUPT3(name, nr, smp_##name); \
> > +   TRACE_BUILD_INTERRUPT(name, nr)
> >
> >  /* The include is where all of the SMP etc. interrupts come from */
> >  #include <asm/entry_arch.h>
> > diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> > index c1d01e6..4570477 100644
> > --- a/arch/x86/kernel/entry_64.S
> > +++ b/arch/x86/kernel/entry_64.S
> > @@ -1138,7 +1138,7 @@ END(common_interrupt)
> >  /*
> >   * APIC interrupts.
> >   */
> > -.macro apicinterrupt num sym do_sym
> > +.macro apicinterrupt3 num sym do_sym
> >  ENTRY(\sym)
> >     INTR_FRAME
> >     ASM_CLAC
> > @@ -1150,15 +1150,32 @@ ENTRY(\sym)
> >  END(\sym)
> >  .endm
> >
> > +#ifdef CONFIG_TRACING
> > +#define trace(sym) trace_##sym
> > +#define smp_trace(sym) smp_trace_##sym
> > +
> > +.macro trace_apicinterrupt num sym
> > +apicinterrupt3 \num trace(\sym) smp_trace(\sym)
> > +.endm
> > +#else
> > +.macro trace_apicinterrupt num sym do_sym
> > +.endm
> > +#endif
> > +
> > +.macro apicinterrupt num sym do_sym
> > +apicinterrupt3 \num \sym \do_sym
> > +trace_apicinterrupt \num \sym
> > +.endm
> > +
> >  #ifdef CONFIG_SMP
> > -apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
> > +apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR \
> >     irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
> > -apicinterrupt REBOOT_VECTOR \
> > +apicinterrupt3 REBOOT_VECTOR \
> >     reboot_interrupt smp_reboot_interrupt
> >  #endif
> >
> >  #ifdef CONFIG_X86_UV
> > -apicinterrupt UV_BAU_MESSAGE \
> > +apicinterrupt3 UV_BAU_MESSAGE \
> >     uv_bau_message_intr1 uv_bau_message_interrupt
> >  #endif
> >  apicinterrupt LOCAL_TIMER_VECTOR \
> > @@ -1446,13 +1463,13 @@ ENTRY(xen_failsafe_callback)
> >     CFI_ENDPROC
> >  END(xen_failsafe_callback)
> >
> > -apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
> > +apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
> >     xen_hvm_callback_vector xen_evtchn_do_upcall
> >
> >  #endif /* CONFIG_XEN */
> >
> >  #if IS_ENABLED(CONFIG_HYPERV)
> > -apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
> > +apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
> >     hyperv_callback_vector hyperv_vector_handler
> >  #endif /* CONFIG_HYPERV */
> >
> > diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> > index 6859e96..e827e67 100644
> > --- a/arch/x86/kernel/head_64.S
> > +++ b/arch/x86/kernel/head_64.S
> > @@ -518,6 +518,12 @@ ENTRY(idt_table)
> >  ENTRY(nmi_idt_table)
> >     .skip IDT_ENTRIES * 16
> >
> > +#ifdef CONFIG_TRACING
> > +   .align L1_CACHE_BYTES
> > +ENTRY(trace_idt_table)
> > +   .skip IDT_ENTRIES * 16
> > +#endif
> > +
> >     __PAGE_ALIGNED_BSS
> >  NEXT_PAGE(empty_zero_page)
> >     .skip PAGE_SIZE
> > diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
> > index e4595f1..216bec1 100644
> > --- a/arch/x86/kernel/irq.c
> > +++ b/arch/x86/kernel/irq.c
> > @@ -17,6 +17,7 @@
> >  #include <asm/idle.h>
> >  #include <asm/mce.h>
> >  #include <asm/hw_irq.h>
> > +#include <asm/trace/irq_vectors.h>
> >
> >  atomic_t irq_err_count;
> >
> > @@ -228,6 +229,28 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
> >     set_irq_regs(old_regs);
> >  }
> >
> > +void smp_trace_x86_platform_ipi(struct pt_regs *regs)
> > +{
> > +   struct pt_regs *old_regs = set_irq_regs(regs);
> > +
> > +   ack_APIC_irq();
> > +
> > +   irq_enter();
> > +
> > +   exit_idle();
> > +
> > +   trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
> > +   inc_irq_stat(x86_platform_ipis);
> > +
> > +   if (x86_platform_ipi_callback)
> > +           x86_platform_ipi_callback();
> > +
> > +   trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
> > +   irq_exit();
> > +
> > +   set_irq_regs(old_regs);
> > +}
> > +
> >  EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
> >
> >  #ifdef CONFIG_HOTPLUG_CPU
> > diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
> > index ca8f703..09e6262 100644
> > --- a/arch/x86/kernel/irq_work.c
> > +++ b/arch/x86/kernel/irq_work.c
> > @@ -8,6 +8,7 @@
> >  #include <linux/irq_work.h>
> >  #include <linux/hardirq.h>
> >  #include <asm/apic.h>
> > +#include <asm/trace/irq_vectors.h>
> >
> >  void smp_irq_work_interrupt(struct pt_regs *regs)
> >  {
> > @@ -18,6 +19,17 @@ void smp_irq_work_interrupt(struct pt_regs *regs)
> >     irq_exit();
> >  }
> >
> > +void smp_trace_irq_work_interrupt(struct pt_regs *regs)
> > +{
> > +   irq_enter();
> > +   ack_APIC_irq();
> > +   trace_irq_work_entry(IRQ_WORK_VECTOR);
> > +   inc_irq_stat(apic_irq_work_irqs);
> > +   irq_work_run();
> > +   trace_irq_work_exit(IRQ_WORK_VECTOR);
> > +   irq_exit();
> > +}
> > +
> >  void arch_irq_work_raise(void)
> >  {
> >  #ifdef CONFIG_X86_LOCAL_APIC
> > diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
> > index 48d2b7d..aad58af 100644
> > --- a/arch/x86/kernel/smp.c
> > +++ b/arch/x86/kernel/smp.c
> > @@ -30,6 +30,7 @@
> >  #include <asm/proto.h>
> >  #include <asm/apic.h>
> >  #include <asm/nmi.h>
> > +#include <asm/trace/irq_vectors.h>
> >  /*
> >   * Some notes on x86 processor bugs affecting SMP operation:
> >   *
> > @@ -259,6 +260,18 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
> >      */
> >  }
> >
> > +void smp_trace_reschedule_interrupt(struct pt_regs *regs)
> > +{
> > +   ack_APIC_irq();
> > +   trace_reschedule_entry(RESCHEDULE_VECTOR);
> > +   inc_irq_stat(irq_resched_count);
> > +   scheduler_ipi();
> > +   trace_reschedule_exit(RESCHEDULE_VECTOR);
> > +   /*
> > +    * KVM uses this interrupt to force a cpu out of guest mode
> > +    */
> > +}
> > +
> >  void smp_call_function_interrupt(struct pt_regs *regs)
> >  {
> >     ack_APIC_irq();
> > @@ -268,6 +281,17 @@ void smp_call_function_interrupt(struct pt_regs *regs)
> >     irq_exit();
> >  }
> >
> > +void smp_trace_call_function_interrupt(struct pt_regs *regs)
> > +{
> > +   ack_APIC_irq();
> > +   irq_enter();
> > +   trace_call_function_entry(CALL_FUNCTION_VECTOR);
> > +   generic_smp_call_function_interrupt();
> > +   inc_irq_stat(irq_call_count);
> > +   trace_call_function_exit(CALL_FUNCTION_VECTOR);
> > +   irq_exit();
> > +}
> > +
> >  void smp_call_function_single_interrupt(struct pt_regs *regs)
> >  {
> >     ack_APIC_irq();
> > @@ -277,6 +301,17 @@ void smp_call_function_single_interrupt(struct pt_regs 
> > *regs)
> >     irq_exit();
> >  }
> >
> > +void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
> > +{
> > +   ack_APIC_irq();
> > +   irq_enter();
> > +   trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
> > +   generic_smp_call_function_single_interrupt();
> > +   inc_irq_stat(irq_call_count);
> > +   trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
> > +   irq_exit();
> > +}
> > +
> >  static int __init nonmi_ipi_setup(char *str)
> >  {
> >     smp_no_nmi_ipi = true;
> > diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
> > new file mode 100644
> > index 0000000..3403dc3
> > --- /dev/null
> > +++ b/arch/x86/kernel/tracepoint.c
> > @@ -0,0 +1,71 @@
> > +/*
> > + * Code for supporting irq vector tracepoints.
> > + *
> > + * Copyright (C) 2013 Seiji Aguchi <[email protected]>
> > + *
> > + */
> > +#include <asm/hw_irq.h>
> > +#include <asm/desc.h>
> > +
> > +struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
> > +                           (unsigned long) trace_idt_table };
> > +
> > +#ifndef CONFIG_X86_64
> > +gate_desc trace_idt_table[NR_VECTORS] __page_aligned_data
> > +                                   = { { { { 0, 0 } } }, };
> > +#endif
> > +
> > +static int trace_irq_vector_refcount;
> > +static DEFINE_MUTEX(irq_vector_mutex);
> > +DEFINE_PER_CPU(u32, trace_idt_in_use);
> > +
> > +static void switch_to_trace_idt(void *arg)
> > +{
> > +
> > +   this_cpu_inc(trace_idt_in_use);
> > +   load_idt(&trace_idt_descr);
> > +
> > +   return;
> > +}
> > +
> > +static void restore_original_idt(void *arg)
> > +{
> > +   if (WARN_ON(!this_cpu_read(trace_idt_in_use)))
> > +           return;
> > +   this_cpu_dec(trace_idt_in_use);
> > +
> > +#ifdef CONFIG_X86_64
> > +   if (this_cpu_read(debug_stack_use_ctr))
> > +           load_idt(&nmi_idt_descr);
> > +   else
> > +#endif
> 
> This is unneeded, as debug_stack_use_ctr should never be set here. It
> only gets set when interrupts are disabled and cleared before they are
> enabled again, and this is called from interrupt context. If it is set,
> then it is truly a bug.
> 
> Only NMIs themselves can see this set, if it wasn't the one to set it.
> 
> > +           load_idt(&idt_descr);
> > +   return;
> > +}
> > +
> > +void trace_irq_vector_regfunc(void)
> > +{
> > +   mutex_lock(&irq_vector_mutex);
> > +   if (!trace_irq_vector_refcount) {
> > +           smp_call_function(switch_to_trace_idt, NULL, 0);
> > +           local_irq_disable();
> > +           switch_to_trace_idt(NULL);
> > +           local_irq_enable();
> > +   }
> > +   trace_irq_vector_refcount++;
> > +   mutex_unlock(&irq_vector_mutex);
> > +}
> > +
> > +void trace_irq_vector_unregfunc(void)
> > +{
> > +   mutex_lock(&irq_vector_mutex);
> > +   trace_irq_vector_refcount--;
> > +   if (!trace_irq_vector_refcount) {
> 
> Hmm, this needs to handle cpu hotplug. I just did the following:
> 
> echo 0 > /sys/devices/system/cpu/cpu3/online
> echo 1 > /debug/tracing/events/irq_vectors/enable
> echo 1 > /sys/devices/systems/cpu/cpu3/online
> echo 0 > /debug/tracing/events/irq/vectors/enable
> 
> and got this:
> 
> [  224.595931] ------------[ cut here ]------------
> [  224.596928] WARNING: at 
> /home/rostedt/work/git/linux-trace.git/arch/x86/kernel/tracepoint.c:33 
> restore_original_idt+0x26/0x4e()
> [  224.596928] Modules linked in: ebtables ipt_MASQUERADE sunrpc bridge stp 
> llc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6
> ip6table_filter ip6_tables ipv6 uinput snd_hda_codec_idt snd_hda_intel 
> snd_hda_codec kvm_intel snd_hwdep snd_seq kvm
> snd_seq_device snd_pcm snd_timer snd soundcore snd_page_alloc shpchp 
> microcode i2c_i801 pata_acpi firewire_ohci firewire_core
> crc_itu_t ata_generic i915 drm_kms_helper drm i2c_algo_bit i2c_core video
> [  224.596928] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 3.10.0-rc2-test+ #137
> [  224.596928] Hardware name: To Be Filled By O.E.M. To Be Filled By 
> O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
> [  224.596928]  ffffffff817c948b ffff88007d583ef8 ffffffff814c9ec6 
> ffff88007d583f38
> [  224.596928]  ffffffff810363a5 ffff88007d5971b8 0000000000000000 
> ffff88007d5971b8
> [  224.596928]  ffff88007d583f68 0000000000000001 0000000000000000 
> ffff88007d583f48
> [  224.596928] Call Trace:
> [  224.596928]  <IRQ>  [<ffffffff814c9ec6>] dump_stack+0x19/0x1b
> [  224.596928]  [<ffffffff810363a5>] warn_slowpath_common+0x67/0x80
> [  224.596928]  [<ffffffff810363d8>] warn_slowpath_null+0x1a/0x1c
> [  224.596928]  [<ffffffff8102864c>] restore_original_idt+0x26/0x4e
> [  224.596928]  [<ffffffff8108bd52>] 
> generic_smp_call_function_single_interrupt+0xb5/0xee
> [  224.596928]  [<ffffffff8101f500>] smp_call_function_interrupt+0x18/0x27
> [  224.596928]  [<ffffffff814d646f>] call_function_interrupt+0x6f/0x80
> [  224.596928]  <EOI>  [<ffffffff81009727>] ? default_idle+0x21/0x32
> [  224.596928]  [<ffffffff81009725>] ? default_idle+0x1f/0x32
> [  224.596928]  [<ffffffff81009e37>] arch_cpu_idle+0x18/0x22
> [  224.596928]  [<ffffffff810799da>] cpu_startup_entry+0xc8/0x12e
> [  224.596928]  [<ffffffff814bc6ad>] start_secondary+0x250/0x257
> 
> -- Steve
> 
> 
> > +           smp_call_function(restore_original_idt, NULL, 0);
> > +           local_irq_disable();
> > +           restore_original_idt(NULL);
> > +           local_irq_enable();
> > +   }
> > +   mutex_unlock(&irq_vector_mutex);
> > +}
> > +
> > diff --git a/include/xen/events.h b/include/xen/events.h
> > index c6bfe01..9216d07 100644
> > --- a/include/xen/events.h
> > +++ b/include/xen/events.h
> > @@ -76,6 +76,9 @@ unsigned irq_from_evtchn(unsigned int evtchn);
> >
> >  /* Xen HVM evtchn vector callback */
> >  void xen_hvm_callback_vector(void);
> > +#ifdef CONFIG_TRACING
> > +#define trace_xen_hvm_callback_vector xen_hvm_callback_vector
> > +#endif
> >  extern int xen_have_vector_callback;
> >  int xen_set_callback_via(uint64_t via);
> >  void xen_evtchn_do_upcall(struct pt_regs *regs);
> 

Reply via email to