On Mon,  4 Jul 2016 15:46:04 -0400
Julien Desfossez <jdesfos...@efficios.com> wrote:


> diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
> index 9b90c57..fcb0f29 100644
> --- a/include/trace/events/sched.h
> +++ b/include/trace/events/sched.h
> @@ -8,6 +8,34 @@
>  #include <linux/tracepoint.h>
>  #include <linux/binfmts.h>
>  
> +#define SCHEDULING_POLICY                            \
> +     EM( SCHED_NORMAL,       "SCHED_NORMAL")         \
> +     EM( SCHED_FIFO,         "SCHED_FIFO")           \
> +     EM( SCHED_RR,           "SCHED_RR")             \
> +     EM( SCHED_BATCH,        "SCHED_BATCH")          \
> +     EM( SCHED_IDLE,         "SCHED_IDLE")           \
> +     EMe(SCHED_DEADLINE,     "SCHED_DEADLINE")
> +
> +/*
> + * First define the enums in the above macros to be exported to userspace
> + * via TRACE_DEFINE_ENUM().
> + */
> +#undef EM
> +#undef EMe
> +#define EM(a, b)     TRACE_DEFINE_ENUM(a);
> +#define EMe(a, b)    TRACE_DEFINE_ENUM(a);
> +
> +SCHEDULING_POLICY
> +
> +/*
> + * Now redefine the EM() and EMe() macros to map the enums to the strings
> + * that will be printed in the output.
> + */
> +#undef EM
> +#undef EMe
> +#define EM(a, b)     {a, b},
> +#define EMe(a, b)    {a, b}
> +
>  /*
>   * Tracepoint for calling kthread_stop, performed to end a kthread:
>   */
> @@ -562,6 +590,46 @@ TRACE_EVENT(sched_wake_idle_without_ipi,
>  
>       TP_printk("cpu=%d", __entry->cpu)
>  );
> +
> +/*
> + * Tracepoint for showing scheduling priority changes.
> + */
> +TRACE_EVENT(sched_prio_update,

I'm fine with the addition of this tracepoint. You'll have to get by
Peter Zijlstra for it.

> +
> +     TP_PROTO(struct task_struct *tsk),
> +
> +     TP_ARGS(tsk),
> +
> +     TP_STRUCT__entry(
> +             __array( char,  comm,   TASK_COMM_LEN   )

I could imagine this being a high frequency tracepoint, especially with
a lot of boosting going on. Can we nuke the comm recording and let the
userspace tools just hook to the sched_switch tracepoint for that?

-- Steve


> +             __field( pid_t, pid                     )
> +             __field( unsigned int,  policy          )
> +             __field( int,   nice                    )
> +             __field( unsigned int,  rt_priority     )
> +             __field( u64,   dl_runtime              )
> +             __field( u64,   dl_deadline             )
> +             __field( u64,   dl_period               )
> +     ),
> +
> +     TP_fast_assign(
> +             memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
> +             __entry->pid            = tsk->pid;
> +             __entry->policy         = tsk->policy;
> +             __entry->nice           = task_nice(tsk);
> +             __entry->rt_priority    = tsk->rt_priority;
> +             __entry->dl_runtime     = tsk->dl.dl_runtime;
> +             __entry->dl_deadline    = tsk->dl.dl_deadline;
> +             __entry->dl_period      = tsk->dl.dl_period;
> +     ),
> +
> +     TP_printk("comm=%s pid=%d, policy=%s, nice=%d, rt_priority=%u, "
> +                     "dl_runtime=%Lu, dl_deadline=%Lu, dl_period=%Lu",
> +                     __entry->comm, __entry->pid,
> +                     __print_symbolic(__entry->policy, SCHEDULING_POLICY),
> +                     __entry->nice, __entry->rt_priority,
> +                     __entry->dl_runtime, __entry->dl_deadline,
> +                     __entry->dl_period)
> +);
>  #endif /* _TRACE_SCHED_H */
>  
>  /* This part must be outside protection */
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 7926993..ac4294a 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1773,6 +1773,7 @@ long _do_fork(unsigned long clone_flags,
>               struct pid *pid;
>  
>               trace_sched_process_fork(current, p);
> +             trace_sched_prio_update(p);
>  
>               pid = get_task_pid(p, PIDTYPE_PID);
>               nr = pid_vnr(pid);
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index ce83e39..c729425 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3708,6 +3708,7 @@ void set_user_nice(struct task_struct *p, long nice)
>                       resched_curr(rq);
>       }
>  out_unlock:
> +     trace_sched_prio_update(p);
>       task_rq_unlock(rq, p, &rf);
>  }
>  EXPORT_SYMBOL(set_user_nice);
> @@ -3912,6 +3913,8 @@ static void __setscheduler(struct rq *rq, struct 
> task_struct *p,
>               p->sched_class = &rt_sched_class;
>       else
>               p->sched_class = &fair_sched_class;
> +
> +     trace_sched_prio_update(p);
>  }
>  
>  static void

Reply via email to