Re: [RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation
Steven Rostedt wrote: > Index: linux-compile-i386.git/Makefile > === > --- linux-compile-i386.git.orig/Makefile 2008-01-09 14:09:36.0 > -0500 > +++ linux-compile-i386.git/Makefile 2008-01-09 14:10:07.0 -0500 > @@ -509,6 +509,10 @@ endif > > include $(srctree)/arch/$(SRCARCH)/Makefile > > +# MCOUNT expects frame pointer This comment looks stray. > +ifdef CONFIG_MCOUNT > +KBUILD_CFLAGS+= -pg > +endif > ifdef CONFIG_FRAME_POINTER > KBUILD_CFLAGS+= -fno-omit-frame-pointer -fno-optimize-sibling-calls > else > Index: linux-compile-i386.git/arch/x86/Kconfig > === > --- linux-compile-i386.git.orig/arch/x86/Kconfig 2008-01-09 > 14:09:36.0 -0500 > +++ linux-compile-i386.git/arch/x86/Kconfig 2008-01-09 14:10:07.0 > -0500 > @@ -28,6 +28,10 @@ config GENERIC_CMOS_UPDATE > bool > default y > > +config ARCH_HAS_MCOUNT > + bool > + default y > + > config CLOCKSOURCE_WATCHDOG > bool > default y > Index: linux-compile-i386.git/arch/x86/kernel/Makefile_32 > === > --- linux-compile-i386.git.orig/arch/x86/kernel/Makefile_32 2008-01-09 > 14:09:36.0 -0500 > +++ linux-compile-i386.git/arch/x86/kernel/Makefile_322008-01-09 > 14:10:07.0 -0500 > @@ -23,6 +23,7 @@ obj-$(CONFIG_APM) += apm_32.o > obj-$(CONFIG_X86_SMP)+= smp_32.o smpboot_32.o tsc_sync.o > obj-$(CONFIG_SMP)+= smpcommon_32.o > obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_32.o > +obj-$(CONFIG_MCOUNT) += mcount-wrapper.o So far the code organization is different for 32 and 64 bit. I would suggest to either o move both trampolines into entry_*.S or o put them in something like mcount-wrapper_32/64.S. > obj-$(CONFIG_X86_MPPARSE)+= mpparse_32.o > obj-$(CONFIG_X86_LOCAL_APIC) += apic_32.o nmi_32.o > obj-$(CONFIG_X86_IO_APIC)+= io_apic_32.o > Index: linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S > === > --- /dev/null 1970-01-01 00:00:00.0 + > +++ linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S 2008-01-09 > 14:10:07.0 -0500 > @@ -0,0 +1,25 @@ > +/* > + * linux/arch/x86/mcount-wrapper.S > + * > + * Copyright (C) 2004 Ingo Molnar > + */ > + > +.globl mcount > +mcount: > + cmpl $0, mcount_enabled > + jz out > + > + push %ebp > + mov %esp, %ebp What is the benefit of having a call frame in this trampoline? We used to carry this in the i386 mcount tracer for Adeos/I-pipe too (it was derived from the -rt code), but I just successfully tested a removal patch. Also glibc [1] doesn't include it. > + pushl %eax > + pushl %ecx > + pushl %edx > + > + call __mcount I think this indirection should be avoided, just like the 64-bit version and glibc do. > + > + popl %edx > + popl %ecx > + popl %eax > + popl %ebp > +out: > + ret ... > Index: linux-compile-i386.git/lib/tracing/mcount.c > === > --- /dev/null 1970-01-01 00:00:00.0 + > +++ linux-compile-i386.git/lib/tracing/mcount.c 2008-01-09 > 14:10:07.0 -0500 > @@ -0,0 +1,77 @@ > +/* > + * Infrastructure for profiling code inserted by 'gcc -pg'. > + * > + * Copyright (C) 2007 Arnaldo Carvalho de Melo <[EMAIL PROTECTED]> > + * > + * Converted to be more generic: > + * Copyright (C) 2007-2008 Steven Rostedt <[EMAIL PROTECTED]> > + * > + * From code in the latency_tracer, that is: > + * > + * Copyright (C) 2004-2006 Ingo Molnar > + * Copyright (C) 2004 William Lee Irwin III > + */ > + > +#include > +#include > + > +/* > + * Since we have nothing protecting between the test of > + * mcount_trace_function and the call to it, we can't > + * set it to NULL without risking a race that will have > + * the kernel call the NULL pointer. Instead, we just > + * set the function pointer to a dummy function. > + */ > +notrace void dummy_mcount_tracer(unsigned long ip, > + unsigned long parent_ip) > +{ > + /* do nothing */ > +} > + > +mcount_func_t mcount_trace_function __read_mostly = dummy_mcount_tracer; > +int mcount_enabled __read_mostly; > + > +/** __mcount - hook for profiling > + * > + * This routine is called from the arch specific mcount routine, that in > turn is > + * called from code inserted by gcc -pg. > + */ > +notrace void __mcount(void) > +{ > + mcount_trace_function(CALLER_ADDR1, CALLER_ADDR2); > +} mcount_trace_function should always be called from the assembly trampoline, IMO. > +EXPORT_SYMBOL_GPL(mcount); > +/* > + * The above EXPORT_SYMBOL is for the gcc call of mcount and not the > + * function __mcount that it is underneath. I put the export there > + *
Re: [RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation
On Thu, 10 Jan 2008, Jan Kiszka wrote: > > === > > --- /dev/null 1970-01-01 00:00:00.0 + > > +++ linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S 2008-01-09 > > 14:10:07.0 -0500 > > @@ -0,0 +1,25 @@ > > +/* > > + * linux/arch/x86/mcount-wrapper.S > > + * > > + * Copyright (C) 2004 Ingo Molnar > > + */ > > + > > +.globl mcount > > +mcount: > > + cmpl $0, mcount_enabled > > + jz out > > + > > + push %ebp > > + mov %esp, %ebp > > What is the benefit of having a call frame in this trampoline? We used > to carry this in the i386 mcount tracer for Adeos/I-pipe too (it was > derived from the -rt code), but I just successfully tested a removal > patch. Also glibc [1] doesn't include it. OK, I just tried this out on i386, and it works fine. > > > + pushl %eax > > + pushl %ecx > > + pushl %edx > > + > > + call __mcount > > I think this indirection should be avoided, just like the 64-bit version > and glibc do. I also did this too. > > > + > > + popl %edx > > + popl %ecx > > + popl %eax > > + popl %ebp > > +out: > > + ret > I'll go try the updates on x86_64 now. Thanks for the tips! -- Steve -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation
On Thu, 10 Jan 2008, Jan Kiszka wrote: > Steven Rostedt wrote: > > Index: linux-compile-i386.git/Makefile > > === > > --- linux-compile-i386.git.orig/Makefile2008-01-09 14:09:36.0 > > -0500 > > +++ linux-compile-i386.git/Makefile 2008-01-09 14:10:07.0 -0500 > > @@ -509,6 +509,10 @@ endif > > > > include $(srctree)/arch/$(SRCARCH)/Makefile > > > > +# MCOUNT expects frame pointer > > This comment looks stray. Actually it's not ;-) The original code had something like this: #if CONFIG_MCOUNT KBUILD_CFLAGS += ... #else #if CONFIG_FRAME_POINTER KBUILD_CFLAGS += ... #else KBUILD_CFLAGS += ... #endif #endif And Sam Ravnborg suggested to put that logic into the Kbuild system. For which I did, but I put that comment there to just let others know that MCOUNT expects the flags of FRAME_POINTER. But, I guess we can nuke that comment anyway. It just leads to confusion. > > > +ifdef CONFIG_MCOUNT > > +KBUILD_CFLAGS += -pg > > +endif > > ifdef CONFIG_FRAME_POINTER > > KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls > > else > > Index: linux-compile-i386.git/arch/x86/Kconfig > > === > > --- linux-compile-i386.git.orig/arch/x86/Kconfig2008-01-09 > > 14:09:36.0 -0500 > > +++ linux-compile-i386.git/arch/x86/Kconfig 2008-01-09 14:10:07.0 > > -0500 > > @@ -28,6 +28,10 @@ config GENERIC_CMOS_UPDATE > > bool > > default y > > > > +config ARCH_HAS_MCOUNT > > + bool > > + default y > > + > > config CLOCKSOURCE_WATCHDOG > > bool > > default y > > Index: linux-compile-i386.git/arch/x86/kernel/Makefile_32 > > === > > --- linux-compile-i386.git.orig/arch/x86/kernel/Makefile_32 2008-01-09 > > 14:09:36.0 -0500 > > +++ linux-compile-i386.git/arch/x86/kernel/Makefile_32 2008-01-09 > > 14:10:07.0 -0500 > > @@ -23,6 +23,7 @@ obj-$(CONFIG_APM) += apm_32.o > > obj-$(CONFIG_X86_SMP) += smp_32.o smpboot_32.o tsc_sync.o > > obj-$(CONFIG_SMP) += smpcommon_32.o > > obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_32.o > > +obj-$(CONFIG_MCOUNT) += mcount-wrapper.o > > So far the code organization is different for 32 and 64 bit. I would > suggest to either > > o move both trampolines into entry_*.S or > o put them in something like mcount-wrapper_32/64.S. Yeah, that's a relic from -rt. I never liked that, but I was just too lazy to change it. I think I'll move the mcount_wrapper into entry_32.S > > > obj-$(CONFIG_X86_MPPARSE) += mpparse_32.o > > obj-$(CONFIG_X86_LOCAL_APIC) += apic_32.o nmi_32.o > > obj-$(CONFIG_X86_IO_APIC) += io_apic_32.o > > Index: linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S > > === > > --- /dev/null 1970-01-01 00:00:00.0 + > > +++ linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S 2008-01-09 > > 14:10:07.0 -0500 > > @@ -0,0 +1,25 @@ > > +/* > > + * linux/arch/x86/mcount-wrapper.S > > + * > > + * Copyright (C) 2004 Ingo Molnar > > + */ > > + > > +.globl mcount > > +mcount: > > + cmpl $0, mcount_enabled > > + jz out > > + > > + push %ebp > > + mov %esp, %ebp > > What is the benefit of having a call frame in this trampoline? We used > to carry this in the i386 mcount tracer for Adeos/I-pipe too (it was > derived from the -rt code), but I just successfully tested a removal > patch. Also glibc [1] doesn't include it. Hmm, what about having frame pointers on? Isn't that a requirement? > > > + pushl %eax > > + pushl %ecx > > + pushl %edx > > + > > + call __mcount > > I think this indirection should be avoided, just like the 64-bit version > and glibc do. I thought about that too, but didn't have the time to look into the calling convention for that. # objdump --start-address 0x`nm /lib/libc-2.7.so | sed -ne '/ mcount$/s/^\([0-9a-f]*\).*/\1/p'` -D /lib/libc-2.7.so |head -28 |tail -12 49201cd0 <_mcount>: 49201cd0: 50 push %eax 49201cd1: 51 push %ecx 49201cd2: 52 push %edx 49201cd3: 8b 54 24 0c mov0xc(%esp),%edx 49201cd7: 8b 45 04mov0x4(%ebp),%eax 49201cda: e8 91 f4 ff ff call 49201170 <__mcount_internal> 49201cdf: 5a pop%edx 49201ce0: 59 pop%ecx 49201ce1: 58 pop%eax 49201ce2: c3 ret 49201ce3: 90 nop Until I found out about the frame pointers, I'll leave in the ebp copy. > > > + > > + popl %edx > > + popl %ecx > > + popl %eax > > + popl %ebp > > +out: > > + ret > > [...] > > +/** __mcount - hook for profiling > >
Re: [RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation
Hi Sam, On Thu, 10 Jan 2008, Sam Ravnborg wrote: > Hi Steven. > > > Index: linux-compile-i386.git/arch/x86/Kconfig > > === > > --- linux-compile-i386.git.orig/arch/x86/Kconfig2008-01-09 > > 14:09:36.0 -0500 > > +++ linux-compile-i386.git/arch/x86/Kconfig 2008-01-09 14:10:07.0 > > -0500 > > @@ -28,6 +28,10 @@ config GENERIC_CMOS_UPDATE > > bool > > default y > > > > +config ARCH_HAS_MCOUNT > > + bool > > + default y > > + > > Please use the following scheme: > > arch/x86/Kconfig: > config X86 > + select HAVE_MCOUNT > > lib/tracing/Kconfig > > + # ARCH shall select HAVE_MCOUNT if they provide this function > + config HAVE_MCOUNT > + bool > + > + config MCOUNT > + bool > + select FRAME_POINTER > > And then in your later patches: > +config MCOUNT_TRACER > + bool "Profiler instrumentation based tracer" > + depends on DEBUG_KERNEL && HAVE_MCOUNT > + select MCOUNT > + help > + Use profiler Thanks, this does look like a cleaner approach. I'll implement it into my next series. -- Steve -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation
Hi Steven. > Index: linux-compile-i386.git/arch/x86/Kconfig > === > --- linux-compile-i386.git.orig/arch/x86/Kconfig 2008-01-09 > 14:09:36.0 -0500 > +++ linux-compile-i386.git/arch/x86/Kconfig 2008-01-09 14:10:07.0 > -0500 > @@ -28,6 +28,10 @@ config GENERIC_CMOS_UPDATE > bool > default y > > +config ARCH_HAS_MCOUNT > + bool > + default y > + Please use the following scheme: arch/x86/Kconfig: config X86 + select HAVE_MCOUNT lib/tracing/Kconfig + # ARCH shall select HAVE_MCOUNT if they provide this function + config HAVE_MCOUNT + bool + + config MCOUNT + bool + select FRAME_POINTER And then in your later patches: +config MCOUNT_TRACER + bool "Profiler instrumentation based tracer" + depends on DEBUG_KERNEL && HAVE_MCOUNT + select MCOUNT + help + Use profiler The "default n" is a noop since this is the default. And note that the depends on is removed from MCOUNT because you use it as a select target (so dependencies are not checked anyway). With this scheme implmented you: - Use new naming convention (HAVE_*) - Avoid defining one config variable per arch - Do not have dependencies on selected symbols - More compact representation in arch Kconfig files Sam -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC PATCH 01/22 -v2] Add basic support for gcc profiler instrumentation
If CONFIG_MCOUNT is selected and /proc/sys/kernel/mcount_enabled is set to a non-zero value the mcount routine will be called everytime we enter a kernel function that is not marked with the "notrace" attribute. The mcount routine will then call a registered function if a function happens to be registered. [This code has been highly hacked by Steven Rostedt, so don't blame Arnaldo for all of this ;-) ] Signed-off-by: Arnaldo Carvalho de Melo <[EMAIL PROTECTED]> Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]> --- Makefile |4 ++ arch/x86/Kconfig |4 ++ arch/x86/kernel/Makefile_32 |1 arch/x86/kernel/entry_64.S | 40 arch/x86/kernel/mcount-wrapper.S | 25 include/linux/linkage.h |2 + include/linux/mcount.h | 21 ++ kernel/sysctl.c | 11 + lib/Kconfig.debug|2 + lib/Makefile |2 + lib/tracing/Kconfig |7 +++ lib/tracing/Makefile |3 + lib/tracing/mcount.c | 77 +++ 13 files changed, 199 insertions(+) create mode 100644 arch/i386/kernel/mcount-wrapper.S create mode 100644 lib/tracing/Kconfig create mode 100644 lib/tracing/Makefile create mode 100644 lib/tracing/mcount.c create mode 100644 lib/tracing/mcount.h Index: linux-compile-i386.git/Makefile === --- linux-compile-i386.git.orig/Makefile2008-01-09 14:09:36.0 -0500 +++ linux-compile-i386.git/Makefile 2008-01-09 14:10:07.0 -0500 @@ -509,6 +509,10 @@ endif include $(srctree)/arch/$(SRCARCH)/Makefile +# MCOUNT expects frame pointer +ifdef CONFIG_MCOUNT +KBUILD_CFLAGS += -pg +endif ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls else Index: linux-compile-i386.git/arch/x86/Kconfig === --- linux-compile-i386.git.orig/arch/x86/Kconfig2008-01-09 14:09:36.0 -0500 +++ linux-compile-i386.git/arch/x86/Kconfig 2008-01-09 14:10:07.0 -0500 @@ -28,6 +28,10 @@ config GENERIC_CMOS_UPDATE bool default y +config ARCH_HAS_MCOUNT + bool + default y + config CLOCKSOURCE_WATCHDOG bool default y Index: linux-compile-i386.git/arch/x86/kernel/Makefile_32 === --- linux-compile-i386.git.orig/arch/x86/kernel/Makefile_32 2008-01-09 14:09:36.0 -0500 +++ linux-compile-i386.git/arch/x86/kernel/Makefile_32 2008-01-09 14:10:07.0 -0500 @@ -23,6 +23,7 @@ obj-$(CONFIG_APM) += apm_32.o obj-$(CONFIG_X86_SMP) += smp_32.o smpboot_32.o tsc_sync.o obj-$(CONFIG_SMP) += smpcommon_32.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_32.o +obj-$(CONFIG_MCOUNT) += mcount-wrapper.o obj-$(CONFIG_X86_MPPARSE) += mpparse_32.o obj-$(CONFIG_X86_LOCAL_APIC) += apic_32.o nmi_32.o obj-$(CONFIG_X86_IO_APIC) += io_apic_32.o Index: linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S === --- /dev/null 1970-01-01 00:00:00.0 + +++ linux-compile-i386.git/arch/x86/kernel/mcount-wrapper.S 2008-01-09 14:10:07.0 -0500 @@ -0,0 +1,25 @@ +/* + * linux/arch/x86/mcount-wrapper.S + * + * Copyright (C) 2004 Ingo Molnar + */ + +.globl mcount +mcount: + cmpl $0, mcount_enabled + jz out + + push %ebp + mov %esp, %ebp + pushl %eax + pushl %ecx + pushl %edx + + call __mcount + + popl %edx + popl %ecx + popl %eax + popl %ebp +out: + ret Index: linux-compile-i386.git/include/linux/linkage.h === --- linux-compile-i386.git.orig/include/linux/linkage.h 2008-01-09 14:09:36.0 -0500 +++ linux-compile-i386.git/include/linux/linkage.h 2008-01-09 14:10:07.0 -0500 @@ -3,6 +3,8 @@ #include +#define notrace __attribute__((no_instrument_function)) + #ifdef __cplusplus #define CPP_ASMLINKAGE extern "C" #else Index: linux-compile-i386.git/kernel/sysctl.c === --- linux-compile-i386.git.orig/kernel/sysctl.c 2008-01-09 14:09:36.0 -0500 +++ linux-compile-i386.git/kernel/sysctl.c 2008-01-09 14:10:07.0 -0500 @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -470,6 +471,16 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_MCOUNT + { + .ctl_name = CTL_UNNUMBERED, + .procname = "mcount_enabled", +