On Tue, 20 Mar 2018 14:22:56 -0400
Abderrahmane Benbachir <abderrahmane.benbac...@polymtl.ca> wrote:

> Hi Steve.
> 
> Initcall's tracing support have been recently added to ftrace, which enables 
> detecting latencies within each initcall function. The main concern here is,
> enabling initcall's tracing (which is fine-grained) will add a lots of 
> overhead
> during boot-up. So with boot level tracing we can get the overall picture, 
> with
> low overhead, then we can use function tracing with filters to dig into more
> specific path. 
> 
> This is an example of boot level sequence, with the first initcall and the 
> last
> initcall that were retrieved dynamically at runtime:

Hmm, we had something like this when ftrace was first introduced:

 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d13744cd6e3fef373a3fe656ac349b4e7c49ff79

But was removed because it appeared to be useless...

 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=30dbb20e68e6f7df974b77d2350ebad5eb6f6c9e

> 
> 0,console,  first=con_init,                   last=univ8250_console_init
> 1,security, first=selinux_init,                       
> last=integrity_iintcache_init
> 2,early,    first=trace_init_flags_sys_exit,  last=initialize_ptr_random
> 3,pure,     first=ipc_ns_init,                        last=net_ns_init
> 4,core,     first=xen_pvh_gnttab_setup,               last=__gnttab_init
> 5,postcore, first=irq_sysfs_init,             last=kobject_uevent_init
> 6,arch,     first=bts_init,                   last=pci_arch_init
> 7,subsys,   first=init_vdso,                  last=watchdog_init
> 8,fs,             first=nmi_warning_debugfs,          
> last=acpi_reserve_resources
> 9,rootfs,   first=populate_rootfs,            last=ir_dev_scope_init
> 10,device,  first=ia32_binfmt_init,           last=mcheck_init_device
> 11,late,    first=tboot_late_init,            last=regulator_init_complete
> 
> To make it properly work, the idea is to register at boot time function based 
> events on the first and the last initcall of each boot level.
> 
> The created events will be recycled when the last sequence is reached. 
> 
> This patch is only a proof of concept, I didn't use function based events 
> mechanism, I only changed the first and last initcalls to point to some
> handlers, which worked fine, but need to changed:
> +     (*first_initcall_fn) = trace_bootlevel_start_handler;
> +     (*last_initcall_fn) = trace_bootlevel_end_handler;
> 
> I made this patch to get some feedbacks, and to know if this could be an
> interresting feature to have in ftrace ?
> 

What about instead just adding trace_events to the start and stop of
init calls, and enable them with trace_event=boot during boot up.

Where the output would be:

       swapper/0-1     [000] ....     0.126563: initcall_level: 
level=pre_smp_initcalls
       swapper/0-1     [000] ....     0.126566: initcall_start: 
func=trace_init_flags_sys_exit+0x0/0x14
[..]
       swapper/0-1     [000] ....     0.178691: initcall_finish: 
func=initialize_ptr_random+0x0/0x3b ret=0
       swapper/0-1     [000] ....     0.307805: initcall_level: level=early
       swapper/0-1     [000] ....     0.307807: initcall_start: 
func=ipc_ns_init+0x0/0x5c
[..]
       swapper/0-1     [000] ....     0.307877: initcall_finish: 
func=net_ns_init+0x0/0x15e ret=0
       swapper/0-1     [000] ....     0.307908: initcall_level: level=core
       swapper/0-1     [000] ....     0.307909: initcall_start: 
func=e820__register_nvs_regions+0x0/0x3d
[..]
       swapper/0-1     [000] ....     0.358655: initcall_finish: 
func=__gnttab_init+0x0/0x30 ret=-19
       swapper/0-1     [000] ....     0.358686: initcall_level: level=postcore
       swapper/0-1     [000] ....     0.358686: initcall_start: 
func=irq_sysfs_init+0x0/0x9b
[..]
       swapper/0-1     [002] ....     0.382277: initcall_finish: 
func=kobject_uevent_init+0x0/0x12 ret=0
       swapper/0-1     [002] ....     0.382308: initcall_level: level=arch
       swapper/0-1     [002] ....     0.382309: initcall_start: 
func=bts_init+0x0/0xc2
[..]
       swapper/0-1     [002] ....     0.422657: initcall_finish: 
func=pci_arch_init+0x0/0x6b ret=0
       swapper/0-1     [002] ....     0.422688: initcall_level: level=subsys
       swapper/0-1     [002] ....     0.422688: initcall_start: 
func=init_vdso+0x0/0x3a
[..]
       swapper/0-1     [005] ....     0.995475: initcall_finish: 
func=watchdog_init+0x0/0x9f ret=0
       swapper/0-1     [005] ....     0.995509: initcall_level: level=fs
       swapper/0-1     [005] ....     0.995510: initcall_start: 
func=nmi_warning_debugfs+0x0/0x2c
[..]
       swapper/0-1     [001] ....     1.625378: initcall_finish: 
func=ir_dev_scope_init+0x0/0x38 ret=0
       swapper/0-1     [001] ....     1.625416: initcall_level: level=device
       swapper/0-1     [001] ....     1.625416: initcall_start: 
func=ia32_binfmt_init+0x0/0x19
[..]
       swapper/0-1     [003] ....     2.922618: initcall_finish: 
func=mcheck_init_device+0x0/0x13b ret=0
       swapper/0-1     [003] ....     2.922651: initcall_level: level=late
       swapper/0-1     [003] ....     2.922651: initcall_start: 
func=tboot_late_init+0x0/0x292
[..]
       swapper/0-1     [000] ....    13.501018: initcall_finish: 
func=pci_sysfs_init+0x0/0x5a ret=0

And you could even get what tests ran the longest. Like above, if you
are wondering about that 13 second timestamp, it's because I had my
ring buffer test enabled, which runs for some time:

       swapper/0-1     [003] ....     2.985688: initcall_start: 
func=test_ringbuffer+0x0/0x49c
       swapper/0-1     [000] ....    13.495516: initcall_finish: 
func=test_ringbuffer+0x0/0x49c ret=0

Would something like this work for you?

-- Steve

diff --git a/include/trace/events/boot.h b/include/trace/events/boot.h
new file mode 100644
index 000000000000..0a1d53a848ae
--- /dev/null
+++ b/include/trace/events/boot.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM boot
+
+#if !defined(_TRACE_BOOT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BOOT_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(initcall_level,
+
+       TP_PROTO(const char *level),
+
+       TP_ARGS(level),
+
+       TP_STRUCT__entry(
+               __string(level, level)
+       ),
+
+       TP_fast_assign(
+               __assign_str(level, level);
+       ),
+
+       TP_printk("level=%s", __get_str(level))
+);
+
+TRACE_EVENT(initcall_start,
+
+       TP_PROTO(initcall_t func),
+
+       TP_ARGS(func),
+
+       TP_STRUCT__entry(
+               __field(initcall_t, func)
+       ),
+
+       TP_fast_assign(
+               __entry->func = func;
+       ),
+
+       TP_printk("func=%pS", __entry->func)
+);
+
+TRACE_EVENT(initcall_finish,
+
+       TP_PROTO(initcall_t func, int ret),
+
+       TP_ARGS(func, ret),
+
+       TP_STRUCT__entry(
+               __field(initcall_t,     func)
+               __field(int,            ret)
+       ),
+
+       TP_fast_assign(
+               __entry->func = func;
+               __entry->ret = ret;
+       ),
+
+       TP_printk("func=%pS ret=%d", __entry->func, __entry->ret)
+);
+
+#endif /* if !defined(_TRACE_GPIO_H) || defined(TRACE_HEADER_MULTI_READ) */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/init/main.c b/init/main.c
index a8100b954839..498e08237569 100644
--- a/init/main.c
+++ b/init/main.c
@@ -96,6 +96,9 @@
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/boot.h>
+
 static int kernel_init(void *);
 
 extern void init_IRQ(void);
@@ -826,10 +829,12 @@ int __init_or_module do_one_initcall(initcall_t fn)
        if (initcall_blacklisted(fn))
                return -EPERM;
 
+       trace_initcall_start(fn);
        if (initcall_debug)
                ret = do_one_initcall_debug(fn);
        else
                ret = fn();
+       trace_initcall_finish(fn, ret);
 
        msgbuf[0] = 0;
 
@@ -894,6 +899,7 @@ static void __init do_initcall_level(int level)
                   level, level,
                   NULL, &repair_env_string);
 
+       trace_initcall_level(initcall_level_names[level]);
        for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
                do_one_initcall(*fn);
 }
@@ -928,6 +934,7 @@ static void __init do_pre_smp_initcalls(void)
 {
        initcall_t *fn;
 
+       trace_initcall_level("pre_smp_initcalls");
        for (fn = __initcall_start; fn < __initcall0_start; fn++)
                do_one_initcall(*fn);
 }

Reply via email to