[tip: locking/core] jump_label: Do not profile branch annotations
The following commit has been merged into the locking/core branch of tip: Commit-ID: 2f0df49c89acaa58571d509830bc481250699885 Gitweb: https://git.kernel.org/tip/2f0df49c89acaa58571d509830bc481250699885 Author:Steven Rostedt (VMware) AuthorDate:Fri, 11 Dec 2020 16:37:54 -05:00 Committer: Peter Zijlstra CommitterDate: Fri, 22 Jan 2021 11:08:56 +01:00 jump_label: Do not profile branch annotations While running my branch profiler that checks for incorrect "likely" and "unlikely"s around the kernel, there's a large number of them that are incorrect due to being "static_branches". As static_branches are rather special, as they are likely or unlikely for other reasons than normal annotations are used for, there's no reason to have them be profiled. Expose the "unlikely_notrace" and "likely_notrace" so that the static_branch can use them, and have them be ignored by the branch profilers. Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201211163754.58517...@gandalf.local.home --- include/linux/compiler.h | 2 ++ include/linux/jump_label.h | 12 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index b8fe0c2..df5b405 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -76,6 +76,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #else # define likely(x) __builtin_expect(!!(x), 1) # define unlikely(x) __builtin_expect(!!(x), 0) +# define likely_notrace(x) likely(x) +# define unlikely_notrace(x) unlikely(x) #endif /* Optimization barrier */ diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 3280962..d926912 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -261,14 +261,14 @@ static __always_inline void jump_label_init(void) static __always_inline bool static_key_false(struct static_key *key) { - if (unlikely(static_key_count(key) > 0)) + if (unlikely_notrace(static_key_count(key) > 0)) return true; return false; } static __always_inline bool static_key_true(struct static_key *key) { - if (likely(static_key_count(key) > 0)) + if (likely_notrace(static_key_count(key) > 0)) return true; return false; } @@ -460,7 +460,7 @@ extern bool wrong_branch_error(void); branch = !arch_static_branch_jump(&(x)->key, true); \ else \ branch = wrong_branch_error(); \ - likely(branch); \ + likely_notrace(branch); \ }) #define static_branch_unlikely(x) \ @@ -472,13 +472,13 @@ extern bool wrong_branch_error(void); branch = arch_static_branch(&(x)->key, false); \ else \ branch = wrong_branch_error(); \ - unlikely(branch); \ + unlikely_notrace(branch); \ }) #else /* !CONFIG_JUMP_LABEL */ -#define static_branch_likely(x) likely(static_key_enabled(&(x)->key)) -#define static_branch_unlikely(x) unlikely(static_key_enabled(&(x)->key)) +#define static_branch_likely(x) likely_notrace(static_key_enabled(&(x)->key)) +#define static_branch_unlikely(x) unlikely_notrace(static_key_enabled(&(x)->key)) #endif /* CONFIG_JUMP_LABEL */
[tip: core/static_call] tracepoint: Fix out of sync data passing by static caller
The following commit has been merged into the core/static_call branch of tip: Commit-ID: 547305a64632813286700cb6d768bfe773df7d19 Gitweb: https://git.kernel.org/tip/547305a64632813286700cb6d768bfe773df7d19 Author:Steven Rostedt (VMware) AuthorDate:Thu, 01 Oct 2020 21:27:57 -04:00 Committer: Peter Zijlstra CommitterDate: Fri, 02 Oct 2020 21:18:25 +02:00 tracepoint: Fix out of sync data passing by static caller Naresh reported a bug that appears to be a side effect of the static calls. It happens when going from more than one tracepoint callback to a single one, and removing the first callback on the list. The list of tracepoint callbacks holds data and a function to call with the parameters of that tracepoint and a handler to the associated data. old_list: 0: func = foo; data = NULL; 1: func = bar; data = &bar_struct; new_list: 0: func = bar; data = &bar_struct; CPU 0 CPU 1 - - tp_funcs = old_list; tp_static_caller = tp_interator __DO_TRACE() data = tp_funcs[0].data = NULL; tp_funcs = new_list; tracepoint_update_call() tp_static_caller = tp_funcs[0] = bar; tp_static_caller(data) bar(data) x = data->item = NULL->item BOOM! To solve this, add a tracepoint_synchronize_unregister() between changing tp_funcs and updating the static tracepoint, that does both a synchronize_rcu() and synchronize_srcu(). This will ensure that when the static call is updated to the single callback that it will be receiving the data that it registered with. Fixes: d25e37d89dd2f ("tracepoint: Optimize using static_call()") Reported-by: Naresh Kamboju Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/linux-next/CA+G9fYvPXVRO0NV7yL=FxCmFEMYkCwdz7R=9w+_votpt824...@mail.gmail.com --- kernel/tracepoint.c | 22 -- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index e92f3fb..26efd22 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -221,7 +221,7 @@ static void *func_remove(struct tracepoint_func **funcs, return old; } -static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs) +static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs, bool sync) { void *func = tp->iterator; @@ -229,8 +229,17 @@ static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func if (!tp->static_call_key) return; - if (!tp_funcs[1].func) + if (!tp_funcs[1].func) { func = tp_funcs[0].func; + /* +* If going from the iterator back to a single caller, +* we need to synchronize with __DO_TRACE to make sure +* that the data passed to the callback is the one that +* belongs to that callback. +*/ + if (sync) + tracepoint_synchronize_unregister(); + } __static_call_update(tp->static_call_key, tp->static_call_tramp, func); } @@ -265,7 +274,7 @@ static int tracepoint_add_func(struct tracepoint *tp, * include/linux/tracepoint.h using rcu_dereference_sched(). */ rcu_assign_pointer(tp->funcs, tp_funcs); - tracepoint_update_call(tp, tp_funcs); + tracepoint_update_call(tp, tp_funcs, false); static_key_enable(&tp->key); release_probes(old); @@ -297,11 +306,12 @@ static int tracepoint_remove_func(struct tracepoint *tp, tp->unregfunc(); static_key_disable(&tp->key); + rcu_assign_pointer(tp->funcs, tp_funcs); } else { - tracepoint_update_call(tp, tp_funcs); + rcu_assign_pointer(tp->funcs, tp_funcs); + tracepoint_update_call(tp, tp_funcs, + tp_funcs[0].func != old[0].func); } - - rcu_assign_pointer(tp->funcs, tp_funcs); release_probes(old); return 0; }
[tip: core/static_call] tracepoint: Optimize using static_call()
The following commit has been merged into the core/static_call branch of tip: Commit-ID: d25e37d89dd2f41d7acae0429039d2f0ae8b4a07 Gitweb: https://git.kernel.org/tip/d25e37d89dd2f41d7acae0429039d2f0ae8b4a07 Author:Steven Rostedt (VMware) AuthorDate:Tue, 18 Aug 2020 15:57:52 +02:00 Committer: Ingo Molnar CommitterDate: Tue, 01 Sep 2020 09:58:06 +02:00 tracepoint: Optimize using static_call() Currently the tracepoint site will iterate a vector and issue indirect calls to however many handlers are registered (ie. the vector is long). Using static_call() it is possible to optimize this for the common case of only having a single handler registered. In this case the static_call() can directly call this handler. Otherwise, if the vector is longer than 1, call a function that iterates the whole vector like the current code. [peterz: updated to new interface] Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135805.279421...@infradead.org --- include/linux/tracepoint-defs.h | 5 ++- include/linux/tracepoint.h | 86 ++-- include/trace/define_trace.h| 14 ++--- kernel/tracepoint.c | 25 +++-- 4 files changed, 94 insertions(+), 36 deletions(-) diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index b29950a..de97450 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -11,6 +11,8 @@ #include #include +struct static_call_key; + struct trace_print_flags { unsigned long mask; const char *name; @@ -30,6 +32,9 @@ struct tracepoint_func { struct tracepoint { const char *name; /* Tracepoint name */ struct static_key key; + struct static_call_key *static_call_key; + void *static_call_tramp; + void *iterator; int (*regfunc)(void); void (*unregfunc)(void); struct tracepoint_func __rcu *funcs; diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 598fec9..3722a10 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -19,6 +19,7 @@ #include #include #include +#include struct module; struct tracepoint; @@ -92,7 +93,9 @@ extern int syscall_regfunc(void); extern void syscall_unregfunc(void); #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ +#ifndef PARAMS #define PARAMS(args...) args +#endif #define TRACE_DEFINE_ENUM(x) #define TRACE_DEFINE_SIZEOF(x) @@ -148,6 +151,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #ifdef TRACEPOINTS_ENABLED +#ifdef CONFIG_HAVE_STATIC_CALL +#define __DO_TRACE_CALL(name) static_call(tp_func_##name) +#else +#define __DO_TRACE_CALL(name) __tracepoint_iter_##name +#endif /* CONFIG_HAVE_STATIC_CALL */ + /* * it_func[0] is never NULL because there is at least one element in the array * when the array itself is non NULL. @@ -157,12 +166,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * has a "void" prototype, then it is invalid to declare a function * as "(void *, void)". */ -#define __DO_TRACE(tp, proto, args, cond, rcuidle) \ +#define __DO_TRACE(name, proto, args, cond, rcuidle) \ do {\ struct tracepoint_func *it_func_ptr;\ - void *it_func; \ - void *__data; \ int __maybe_unused __idx = 0; \ + void *__data; \ \ if (!(cond))\ return; \ @@ -182,14 +190,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) rcu_irq_enter_irqson(); \ } \ \ - it_func_ptr = rcu_dereference_raw((tp)->funcs); \ - \ + it_func_ptr = \ + rcu_dereference_raw((&__tracepoint_##name)->funcs); \ if (it_func_ptr) { \ - do {\ - it_func = (it_func_ptr)->func; \ - __data = (it_func_ptr)->data; \ -
[tip: sched/core] sched: Remove struct sched_class::next field
The following commit has been merged into the sched/core branch of tip: Commit-ID: a87e749e8fa1aaef9b4db32e21c2795e69ce67bf Gitweb: https://git.kernel.org/tip/a87e749e8fa1aaef9b4db32e21c2795e69ce67bf Author:Steven Rostedt (VMware) AuthorDate:Thu, 19 Dec 2019 16:44:54 -05:00 Committer: Peter Zijlstra CommitterDate: Thu, 25 Jun 2020 13:45:44 +02:00 sched: Remove struct sched_class::next field Now that the sched_class descriptors are defined in order via the linker script vmlinux.lds.h, there's no reason to have a "next" pointer to the previous priroity structure. The order of the sturctures can be aligned as an array, and used to index and find the next sched_class descriptor. Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191219214558.845353...@goodmis.org --- kernel/sched/deadline.c | 1 - kernel/sched/fair.c | 1 - kernel/sched/idle.c | 1 - kernel/sched/rt.c| 1 - kernel/sched/sched.h | 1 - kernel/sched/stop_task.c | 1 - 6 files changed, 6 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index d9e7946..c9cc1d6 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2481,7 +2481,6 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p, const struct sched_class dl_sched_class __attribute__((section("__dl_sched_class"))) = { - .next = &rt_sched_class, .enqueue_task = enqueue_task_dl, .dequeue_task = dequeue_task_dl, .yield_task = yield_task_dl, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3365f6b..a63f400 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -11124,7 +11124,6 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task */ const struct sched_class fair_sched_class __attribute__((section("__fair_sched_class"))) = { - .next = &idle_sched_class, .enqueue_task = enqueue_task_fair, .dequeue_task = dequeue_task_fair, .yield_task = yield_task_fair, diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index f580629..336d478 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -455,7 +455,6 @@ static void update_curr_idle(struct rq *rq) */ const struct sched_class idle_sched_class __attribute__((section("__idle_sched_class"))) = { - /* .next is NULL */ /* no enqueue/yield_task for idle tasks */ /* dequeue is not valid, we print a debug message there: */ diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 6543d44..f215eea 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2431,7 +2431,6 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) const struct sched_class rt_sched_class __attribute__((section("__rt_sched_class"))) = { - .next = &fair_sched_class, .enqueue_task = enqueue_task_rt, .dequeue_task = dequeue_task_rt, .yield_task = yield_task_rt, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 4165c06..549e7e6 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1754,7 +1754,6 @@ extern const u32 sched_prio_to_wmult[40]; #define RETRY_TASK ((void *)-1UL) struct sched_class { - const struct sched_class *next; #ifdef CONFIG_UCLAMP_TASK int uclamp_enabled; diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index f4bbd54..394bc81 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -111,7 +111,6 @@ static void update_curr_stop(struct rq *rq) */ const struct sched_class stop_sched_class __attribute__((section("__stop_sched_class"))) = { - .next = &dl_sched_class, .enqueue_task = enqueue_task_stop, .dequeue_task = dequeue_task_stop,
[tip: sched/core] sched: Have sched_class_highest define by vmlinux.lds.h
The following commit has been merged into the sched/core branch of tip: Commit-ID: c3a340f7e7eadac7662ab104ceb16432e5a4c6b2 Gitweb: https://git.kernel.org/tip/c3a340f7e7eadac7662ab104ceb16432e5a4c6b2 Author:Steven Rostedt (VMware) AuthorDate:Thu, 19 Dec 2019 16:44:53 -05:00 Committer: Peter Zijlstra CommitterDate: Thu, 25 Jun 2020 13:45:44 +02:00 sched: Have sched_class_highest define by vmlinux.lds.h Now that the sched_class descriptors are defined by the linker script, and this needs to be aware of the existance of stop_sched_class when SMP is enabled or not, as it is used as the "highest" priority when defined. Move the declaration of sched_class_highest to the same location in the linker script that inserts stop_sched_class, and this will also make it easier to see what should be defined as the highest class, as this linker script location defines the priorities as well. Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191219214558.682913...@goodmis.org --- include/asm-generic/vmlinux.lds.h | 5 - kernel/sched/core.c | 8 kernel/sched/sched.h | 17 + 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 2186d7b..66fb84c 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -114,11 +114,14 @@ * relation to each other. */ #define SCHED_DATA \ + STRUCT_ALIGN(); \ + __begin_sched_classes = .; \ *(__idle_sched_class) \ *(__fair_sched_class) \ *(__rt_sched_class) \ *(__dl_sched_class) \ - *(__stop_sched_class) + *(__stop_sched_class) \ + __end_sched_classes = .; /* * Align to a 32 byte boundary equal to the diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0208b71..81640fe 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6646,6 +6646,14 @@ void __init sched_init(void) unsigned long ptr = 0; int i; + /* Make sure the linker didn't screw up */ + BUG_ON(&idle_sched_class + 1 != &fair_sched_class || + &fair_sched_class + 1 != &rt_sched_class || + &rt_sched_class + 1 != &dl_sched_class); +#ifdef CONFIG_SMP + BUG_ON(&dl_sched_class + 1 != &stop_sched_class); +#endif + wait_bit_init(); #ifdef CONFIG_FAIR_GROUP_SCHED diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 3368876..4165c06 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1811,7 +1811,7 @@ struct sched_class { #ifdef CONFIG_FAIR_GROUP_SCHED void (*task_change_group)(struct task_struct *p, int type); #endif -}; +} __aligned(32); /* STRUCT_ALIGN(), vmlinux.lds.h */ static inline void put_prev_task(struct rq *rq, struct task_struct *prev) { @@ -1825,17 +1825,18 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next) next->sched_class->set_next_task(rq, next, false); } -#ifdef CONFIG_SMP -#define sched_class_highest (&stop_sched_class) -#else -#define sched_class_highest (&dl_sched_class) -#endif +/* Defined in include/asm-generic/vmlinux.lds.h */ +extern struct sched_class __begin_sched_classes[]; +extern struct sched_class __end_sched_classes[]; + +#define sched_class_highest (__end_sched_classes - 1) +#define sched_class_lowest (__begin_sched_classes - 1) #define for_class_range(class, _from, _to) \ - for (class = (_from); class != (_to); class = class->next) + for (class = (_from); class != (_to); class--) #define for_each_class(class) \ - for_class_range(class, sched_class_highest, NULL) + for_class_range(class, sched_class_highest, sched_class_lowest) extern const struct sched_class stop_sched_class; extern const struct sched_class dl_sched_class;
[tip: sched/core] sched: Force the address order of each sched class descriptor
The following commit has been merged into the sched/core branch of tip: Commit-ID: 590d69796346353878b275c5512c664e3f875f24 Gitweb: https://git.kernel.org/tip/590d69796346353878b275c5512c664e3f875f24 Author:Steven Rostedt (VMware) AuthorDate:Thu, 19 Dec 2019 16:44:52 -05:00 Committer: Peter Zijlstra CommitterDate: Thu, 25 Jun 2020 13:45:43 +02:00 sched: Force the address order of each sched class descriptor In order to make a micro optimization in pick_next_task(), the order of the sched class descriptor address must be in the same order as their priority to each other. That is: &idle_sched_class < &fair_sched_class < &rt_sched_class < &dl_sched_class < &stop_sched_class In order to guarantee this order of the sched class descriptors, add each one into their own data section and force the order in the linker script. Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/157675913272.349305.8936736338884044103.stgit@localhost.localdomain --- include/asm-generic/vmlinux.lds.h | 13 + kernel/sched/deadline.c | 3 ++- kernel/sched/fair.c | 3 ++- kernel/sched/idle.c | 3 ++- kernel/sched/rt.c | 3 ++- kernel/sched/stop_task.c | 3 ++- 6 files changed, 23 insertions(+), 5 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index db600ef..2186d7b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -109,6 +109,18 @@ #endif /* + * The order of the sched class addresses are important, as they are + * used to determine the order of the priority of each sched class in + * relation to each other. + */ +#define SCHED_DATA \ + *(__idle_sched_class) \ + *(__fair_sched_class) \ + *(__rt_sched_class) \ + *(__dl_sched_class) \ + *(__stop_sched_class) + +/* * Align to a 32 byte boundary equal to the * alignment gcc 4.5 uses for a struct */ @@ -388,6 +400,7 @@ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ __start_rodata = .; \ *(.rodata) *(.rodata.*) \ + SCHED_DATA \ RO_AFTER_INIT_DATA /* Read only after init */ \ . = ALIGN(8); \ __start___tracepoints_ptrs = .; \ diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index d4708e2..d9e7946 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2479,7 +2479,8 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p, } } -const struct sched_class dl_sched_class = { +const struct sched_class dl_sched_class + __attribute__((section("__dl_sched_class"))) = { .next = &rt_sched_class, .enqueue_task = enqueue_task_dl, .dequeue_task = dequeue_task_dl, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0424a0a..3365f6b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -11122,7 +11122,8 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task /* * All the scheduling class methods: */ -const struct sched_class fair_sched_class = { +const struct sched_class fair_sched_class + __attribute__((section("__fair_sched_class"))) = { .next = &idle_sched_class, .enqueue_task = enqueue_task_fair, .dequeue_task = dequeue_task_fair, diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 8d75ca2..f580629 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -453,7 +453,8 @@ static void update_curr_idle(struct rq *rq) /* * Simple, special scheduling class for the per-CPU idle tasks: */ -const struct sched_class idle_sched_class = { +const struct sched_class idle_sched_class + __attribute__((section("__idle_sched_class"))) = { /* .next is NULL */ /* no enqueue/yield_task for idle tasks */ diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index f395ddb..6543d44 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2429,7 +2429,8 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) return 0; } -const struct sched_class rt_sched_class = { +const struct sched_class rt_sched_class + __attribute__((section("__rt_sched_class"))) = { .next = &fair_sched_class, .enqueue_task = enqueue_task_rt, .dequeue_task = dequeue_task_rt, diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 3e50a6a..f4bbd54 1
[tip: perf/core] perf tools: Remove unused trace_find_next_event()
The following commit has been merged into the perf/core branch of tip: Commit-ID: 9bdff5b6436655d42dd30253c521e86ce07b9961 Gitweb: https://git.kernel.org/tip/9bdff5b6436655d42dd30253c521e86ce07b9961 Author:Steven Rostedt (VMware) AuthorDate:Thu, 17 Oct 2019 17:05:23 -04:00 Committer: Arnaldo Carvalho de Melo CommitterDate: Fri, 18 Oct 2019 12:07:46 -03:00 perf tools: Remove unused trace_find_next_event() trace_find_next_event() was buggy and pretty much a useless helper. As there are no more users, just remove it. Signed-off-by: Steven Rostedt (VMware) Cc: Andrew Morton Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tzvetomir Stoyanov Cc: linux-trace-de...@vger.kernel.org Link: http://lore.kernel.org/lkml/20191017210636.224045...@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/trace-event-parse.c | 31 + tools/perf/util/trace-event.h | 2 +-- 2 files changed, 33 deletions(-) diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 5d6bfc7..9634f0a 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -173,37 +173,6 @@ int parse_event_file(struct tep_handle *pevent, return tep_parse_event(pevent, buf, size, sys); } -struct tep_event *trace_find_next_event(struct tep_handle *pevent, - struct tep_event *event) -{ - static int idx; - int events_count; - struct tep_event *all_events; - - all_events = tep_get_first_event(pevent); - events_count = tep_get_events_count(pevent); - if (!pevent || !all_events || events_count < 1) - return NULL; - - if (!event) { - idx = 0; - return all_events; - } - - if (idx < events_count && event == (all_events + idx)) { - idx++; - if (idx == events_count) - return NULL; - return (all_events + idx); - } - - for (idx = 1; idx < events_count; idx++) { - if (event == (all_events + (idx - 1))) - return (all_events + idx); - } - return NULL; -} - struct flag { const char *name; unsigned long long value; diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 2e15838..72fdf2a 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -47,8 +47,6 @@ void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int siz ssize_t trace_report(int fd, struct trace_event *tevent, bool repipe); -struct tep_event *trace_find_next_event(struct tep_handle *pevent, - struct tep_event *event); unsigned long long read_size(struct tep_event *event, void *ptr, int size); unsigned long long eval_flag(const char *flag);
[tip: perf/core] perf scripting engines: Iterate on tep event arrays directly
The following commit has been merged into the perf/core branch of tip: Commit-ID: a5e05abc6b8d81148b35cd8632a4a6252383d968 Gitweb: https://git.kernel.org/tip/a5e05abc6b8d81148b35cd8632a4a6252383d968 Author:Steven Rostedt (VMware) AuthorDate:Thu, 17 Oct 2019 17:05:22 -04:00 Committer: Arnaldo Carvalho de Melo CommitterDate: Fri, 18 Oct 2019 12:07:46 -03:00 perf scripting engines: Iterate on tep event arrays directly Instead of calling a useless (and broken) helper function to get the next event of a tep event array, just get the array directly and iterate over it. Note, the broken part was from trace_find_next_event() which after this will no longer be used, and can be removed. Committer notes: This fixes a segfault when generating python scripts from perf.data files with multiple tracepoint events, i.e. the following use case is fixed by this patch: # perf record -e sched:* sleep 1 [ perf record: Woken up 31 times to write data ] [ perf record: Captured and wrote 0.031 MB perf.data (9 samples) ] # perf script -g python Segmentation fault (core dumped) # Reported-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (VMware) Tested-by: Arnaldo Carvalho de Melo Cc: Andrew Morton Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tzvetomir Stoyanov Cc: linux-trace-de...@vger.kernel.org Link: http://lkml.kernel.org/r/20191017153733.630cd...@gandalf.local.home Link: http://lore.kernel.org/lkml/20191017210636.061448...@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-perl.c | 8 ++-- tools/perf/util/scripting-engines/trace-event-python.c | 9 +++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 1596185..741f040 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -539,10 +539,11 @@ static int perl_stop_script(void) static int perl_generate_script(struct tep_handle *pevent, const char *outfile) { + int i, not_first, count, nr_events; + struct tep_event **all_events; struct tep_event *event = NULL; struct tep_format_field *f; char fname[PATH_MAX]; - int not_first, count; FILE *ofp; sprintf(fname, "%s.pl", outfile); @@ -603,8 +604,11 @@ sub print_backtrace\n\ }\n\n\ "); + nr_events = tep_get_events_count(pevent); + all_events = tep_list_events(pevent, TEP_EVENT_SORT_ID); - while ((event = trace_find_next_event(pevent, event))) { + for (i = 0; all_events && i < nr_events; i++) { + event = all_events[i]; fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name); fprintf(ofp, "\tmy ("); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 5d341ef..93c03b3 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1687,10 +1687,11 @@ static int python_stop_script(void) static int python_generate_script(struct tep_handle *pevent, const char *outfile) { + int i, not_first, count, nr_events; + struct tep_event **all_events; struct tep_event *event = NULL; struct tep_format_field *f; char fname[PATH_MAX]; - int not_first, count; FILE *ofp; sprintf(fname, "%s.py", outfile); @@ -1735,7 +1736,11 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile fprintf(ofp, "def trace_end():\n"); fprintf(ofp, "\tprint(\"in trace_end\")\n\n"); - while ((event = trace_find_next_event(pevent, event))) { + nr_events = tep_get_events_count(pevent); + all_events = tep_list_events(pevent, TEP_EVENT_SORT_ID); + + for (i = 0; all_events && i < nr_events; i++) { + event = all_events[i]; fprintf(ofp, "def %s__%s(", event->system, event->name); fprintf(ofp, "event_name, "); fprintf(ofp, "context, ");
[tip: perf/core] tools lib traceevent: Remove unneeded qsort and uses memmove instead
The following commit has been merged into the perf/core branch of tip: Commit-ID: 301011ba622513cb41ced59973972204e0da2f71 Gitweb: https://git.kernel.org/tip/301011ba622513cb41ced59973972204e0da2f71 Author:Steven Rostedt (VMware) AuthorDate:Wed, 28 Aug 2019 15:05:29 -04:00 Committer: Arnaldo Carvalho de Melo CommitterDate: Thu, 29 Aug 2019 08:36:12 -03:00 tools lib traceevent: Remove unneeded qsort and uses memmove instead While reading a trace data file that had 100,000s of tasks, the process took an extremely long time. I profiled it down to add_new_comm(), which was doing a qsort() call on an array that was pretty much already sorted (all but the last element. qsort() isn't very efficient when dealing with mostly sorted arrays, and this definitely showed its issues. When adding a new task to the task list, instead of using qsort(), do another bsearch() with a function that will find the element before where the new task will be inserted in. Then simply shift the rest of the array, and insert the task where it belongs. Fixes: f7d82350e597d ("tools/events: Add files to create libtraceevent.a") Signed-off-by: Steven Rostedt (VMware) Cc: Andrew Morton Cc: Jiri Olsa Cc: Namhyung Kim Cc: linux-trace-de...@vger.kernel.org Link: http://lkml.kernel.org/r/20190828191820.127233...@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 55 + 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 13fd9fd..3e83636 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -142,6 +142,25 @@ static int cmdline_cmp(const void *a, const void *b) return 0; } +/* Looking for where to place the key */ +static int cmdline_slot_cmp(const void *a, const void *b) +{ + const struct tep_cmdline *ca = a; + const struct tep_cmdline *cb = b; + const struct tep_cmdline *cb1 = cb + 1; + + if (ca->pid < cb->pid) + return -1; + + if (ca->pid > cb->pid) { + if (ca->pid <= cb1->pid) + return 0; + return 1; + } + + return 0; +} + struct cmdline_list { struct cmdline_list *next; char*comm; @@ -239,6 +258,7 @@ static int add_new_comm(struct tep_handle *tep, struct tep_cmdline *cmdline; struct tep_cmdline key; char *new_comm; + int cnt; if (!pid) return 0; @@ -271,18 +291,41 @@ static int add_new_comm(struct tep_handle *tep, } tep->cmdlines = cmdlines; - cmdlines[tep->cmdline_count].comm = strdup(comm); - if (!cmdlines[tep->cmdline_count].comm) { + key.comm = strdup(comm); + if (!key.comm) { errno = ENOMEM; return -1; } - cmdlines[tep->cmdline_count].pid = pid; - - if (cmdlines[tep->cmdline_count].comm) + if (!tep->cmdline_count) { + /* no entries yet */ + tep->cmdlines[0] = key; tep->cmdline_count++; + return 0; + } - qsort(cmdlines, tep->cmdline_count, sizeof(*cmdlines), cmdline_cmp); + /* Now find where we want to store the new cmdline */ + cmdline = bsearch(&key, tep->cmdlines, tep->cmdline_count - 1, + sizeof(*tep->cmdlines), cmdline_slot_cmp); + + cnt = tep->cmdline_count; + if (cmdline) { + /* cmdline points to the one before the spot we want */ + cmdline++; + cnt -= cmdline - tep->cmdlines; + + } else { + /* The new entry is either before or after the list */ + if (key.pid > tep->cmdlines[tep->cmdline_count - 1].pid) { + tep->cmdlines[tep->cmdline_count++] = key; + return 0; + } + cmdline = &tep->cmdlines[0]; + } + memmove(cmdline + 1, cmdline, (cnt * sizeof(*cmdline))); + *cmdline = key; + + tep->cmdline_count++; return 0; }
[tip: perf/core] tools lib traceevent: Do not free tep->cmdlines in add_new_comm() on failure
The following commit has been merged into the perf/core branch of tip: Commit-ID: b0215e2d6a18d8331b2d4a8b38ccf3eff783edb1 Gitweb: https://git.kernel.org/tip/b0215e2d6a18d8331b2d4a8b38ccf3eff783edb1 Author:Steven Rostedt (VMware) AuthorDate:Wed, 28 Aug 2019 15:05:28 -04:00 Committer: Arnaldo Carvalho de Melo CommitterDate: Thu, 29 Aug 2019 08:36:12 -03:00 tools lib traceevent: Do not free tep->cmdlines in add_new_comm() on failure If the re-allocation of tep->cmdlines succeeds, then the previous allocation of tep->cmdlines will be freed. If we later fail in add_new_comm(), we must not free cmdlines, and also should assign tep->cmdlines to the new allocation. Otherwise when freeing tep, the tep->cmdlines will be pointing to garbage. Fixes: a6d2a61ac653a ("tools lib traceevent: Remove some die() calls") Signed-off-by: Steven Rostedt (VMware) Cc: Andrew Morton Cc: Jiri Olsa Cc: Namhyung Kim Cc: linux-trace-de...@vger.kernel.org Cc: sta...@vger.kernel.org Link: http://lkml.kernel.org/r/20190828191819.970121...@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index b36b536..13fd9fd 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -269,10 +269,10 @@ static int add_new_comm(struct tep_handle *tep, errno = ENOMEM; return -1; } + tep->cmdlines = cmdlines; cmdlines[tep->cmdline_count].comm = strdup(comm); if (!cmdlines[tep->cmdline_count].comm) { - free(cmdlines); errno = ENOMEM; return -1; } @@ -283,7 +283,6 @@ static int add_new_comm(struct tep_handle *tep, tep->cmdline_count++; qsort(cmdlines, tep->cmdline_count, sizeof(*cmdlines), cmdline_cmp); - tep->cmdlines = cmdlines; return 0; }