On 12-12-29 11:42 AM, Frederic Weisbecker wrote:
> Allow to dynamically switch between tick and virtual based cputime accounting.
> This way we can provide a kind of "on-demand" virtual based cputime
> accounting. In this mode, the kernel will rely on the user hooks
> subsystem to dynamically hook on kernel boundaries.
> 
> This is in preparation for beeing able to stop the timer tick further
> idle. Doing so will depend on CONFIG_VIRT_CPU_ACCOUNTING which makes

s/beeing/being/  -- also I know what you mean, but it may not be
100% clear to everyone -- perhaps "...for being able to stop the
timer tick in more places than just the idle state." 

> it possible to account the cputime without the tick by hooking on
> kernel/user boundaries.
> 
> Depending whether the tick is stopped or not, we can switch between
> tick and vtime based accounting anytime in order to minimize the
> overhead associated to user hooks.
> 
> Signed-off-by: Frederic Weisbecker <fweis...@gmail.com>
> Cc: Alessio Igor Bogani <abog...@kernel.org>
> Cc: Andrew Morton <a...@linux-foundation.org>
> Cc: Chris Metcalf <cmetc...@tilera.com>
> Cc: Christoph Lameter <c...@linux.com>
> Cc: Geoff Levand <ge...@infradead.org>
> Cc: Gilad Ben Yossef <gi...@benyossef.com>
> Cc: Hakan Akkan <hakanak...@gmail.com>
> Cc: Ingo Molnar <mi...@kernel.org>
> Cc: Paul E. McKenney <paul...@linux.vnet.ibm.com>
> Cc: Paul Gortmaker <paul.gortma...@windriver.com>
> Cc: Peter Zijlstra <pet...@infradead.org>
> Cc: Steven Rostedt <rost...@goodmis.org>
> Cc: Thomas Gleixner <t...@linutronix.de>
> ---
>  include/linux/kernel_stat.h |    2 +-
>  include/linux/sched.h       |    4 +-
>  include/linux/vtime.h       |    8 ++++++
>  init/Kconfig                |    6 ++++
>  kernel/fork.c               |    2 +-
>  kernel/sched/cputime.c      |   58 +++++++++++++++++++++++++++---------------
>  kernel/time/tick-sched.c    |    5 +++-
>  7 files changed, 59 insertions(+), 26 deletions(-)
> 
> diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
> index 66b7078..ed5f6ed 100644
> --- a/include/linux/kernel_stat.h
> +++ b/include/linux/kernel_stat.h
> @@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, 
> int, cputime_t, cputime_t)
>  extern void account_steal_time(cputime_t);
>  extern void account_idle_time(cputime_t);
>  
> -#ifdef CONFIG_VIRT_CPU_ACCOUNTING
> +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
>  static inline void account_process_tick(struct task_struct *tsk, int user)
>  {
>       vtime_account_user(tsk);
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 206bb08..66b2344 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -605,7 +605,7 @@ struct signal_struct {
>       cputime_t utime, stime, cutime, cstime;
>       cputime_t gtime;
>       cputime_t cgtime;
> -#ifndef CONFIG_VIRT_CPU_ACCOUNTING
> +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
>       struct cputime prev_cputime;
>  #endif
>       unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
> @@ -1365,7 +1365,7 @@ struct task_struct {
>  
>       cputime_t utime, stime, utimescaled, stimescaled;
>       cputime_t gtime;
> -#ifndef CONFIG_VIRT_CPU_ACCOUNTING
> +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
>       struct cputime prev_cputime;
>  #endif
>       unsigned long nvcsw, nivcsw; /* context switch counts */
> diff --git a/include/linux/vtime.h b/include/linux/vtime.h
> index 1151960..e57020d 100644
> --- a/include/linux/vtime.h
> +++ b/include/linux/vtime.h
> @@ -10,12 +10,20 @@ extern void vtime_account_system_irqsafe(struct 
> task_struct *tsk);
>  extern void vtime_account_idle(struct task_struct *tsk);
>  extern void vtime_account_user(struct task_struct *tsk);
>  extern void vtime_account(struct task_struct *tsk);
> +
> +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
> +extern bool vtime_accounting(void);
>  #else
> +static inline bool vtime_accounting(void) { return true; }
> +#endif
> +
> +#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
>  static inline void vtime_task_switch(struct task_struct *prev) { }
>  static inline void vtime_account_system(struct task_struct *tsk) { }
>  static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
>  static inline void vtime_account_user(struct task_struct *tsk) { }
>  static inline void vtime_account(struct task_struct *tsk) { }
> +static inline bool vtime_accounting(void) { return false; }

It wasn't 100% obvious what vtime_accounting() was doing until I
saw its definition below.  I wonder if it should be something like
vtime_accounting_on() or vtime_accounting_enabled() instead?

>  #endif
>  
>  #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
> diff --git a/init/Kconfig b/init/Kconfig
> index dad2b88..307bc35 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -342,6 +342,7 @@ config VIRT_CPU_ACCOUNTING
>       bool "Deterministic task and CPU time accounting"
>       depends on HAVE_VIRT_CPU_ACCOUNTING || HAVE_CONTEXT_TRACKING
>       select VIRT_CPU_ACCOUNTING_GEN if !HAVE_VIRT_CPU_ACCOUNTING
> +     select VIRT_CPU_ACCOUNTING_NATIVE if HAVE_VIRT_CPU_ACCOUNTING
>       help
>         Select this option to enable more accurate task and CPU time
>         accounting.  This is done by reading a CPU counter on each
> @@ -366,11 +367,16 @@ endchoice
>  
>  config VIRT_CPU_ACCOUNTING_GEN
>       select CONTEXT_TRACKING
> +     depends on VIRT_CPU_ACCOUNTING && HAVE_CONTEXT_TRACKING

Should the 2nd half of this depends been already here, i.e. introduced
with the prev. patch that created VIRT_CPU_ACCOUNTING_GEN?

Paul.
--

>       bool
>       help
>         Implement a generic virtual based cputime accounting by using
>         the context tracking subsystem.
>  
> +config VIRT_CPU_ACCOUNTING_NATIVE
> +     depends on VIRT_CPU_ACCOUNTING && HAVE_VIRT_CPU_ACCOUNTING
> +     bool
> +
>  config BSD_PROCESS_ACCT
>       bool "BSD Process Accounting"
>       help
> diff --git a/kernel/fork.c b/kernel/fork.c
> index a31b823..8e934d2 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1222,7 +1222,7 @@ static struct task_struct *copy_process(unsigned long 
> clone_flags,
>  
>       p->utime = p->stime = p->gtime = 0;
>       p->utimescaled = p->stimescaled = 0;
> -#ifndef CONFIG_VIRT_CPU_ACCOUNTING
> +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
>       p->prev_cputime.utime = p->prev_cputime.stime = 0;
>  #endif
>  #if defined(SPLIT_RSS_COUNTING)
> diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
> index 3749a0e..e1fcab4 100644
> --- a/kernel/sched/cputime.c
> +++ b/kernel/sched/cputime.c
> @@ -317,8 +317,6 @@ out:
>       rcu_read_unlock();
>  }
>  
> -#ifndef CONFIG_VIRT_CPU_ACCOUNTING
> -
>  #ifdef CONFIG_IRQ_TIME_ACCOUNTING
>  /*
>   * Account a tick to a process and cpustat
> @@ -388,6 +386,7 @@ static void irqtime_account_process_tick(struct 
> task_struct *p, int user_tick,
>                                               struct rq *rq) {}
>  #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
>  
> +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
>  /*
>   * Account a single tick of cpu time.
>   * @p: the process that the cpu time gets accounted to
> @@ -398,6 +397,11 @@ void account_process_tick(struct task_struct *p, int 
> user_tick)
>       cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
>       struct rq *rq = this_rq();
>  
> +     if (vtime_accounting()) {
> +             vtime_account_user(p);
> +             return;
> +     }
> +
>       if (sched_clock_irqtime) {
>               irqtime_account_process_tick(p, user_tick, rq);
>               return;
> @@ -439,29 +443,13 @@ void account_idle_ticks(unsigned long ticks)
>  
>       account_idle_time(jiffies_to_cputime(ticks));
>  }
> -
>  #endif
>  
> +
>  /*
>   * Use precise platform statistics if available:
>   */
>  #ifdef CONFIG_VIRT_CPU_ACCOUNTING
> -void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t 
> *st)
> -{
> -     *ut = p->utime;
> -     *st = p->stime;
> -}
> -
> -void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, 
> cputime_t *st)
> -{
> -     struct task_cputime cputime;
> -
> -     thread_group_cputime(p, &cputime);
> -
> -     *ut = cputime.utime;
> -     *st = cputime.stime;
> -}
> -
>  void vtime_account_system_irqsafe(struct task_struct *tsk)
>  {
>       unsigned long flags;
> @@ -517,8 +505,25 @@ void vtime_account(struct task_struct *tsk)
>  }
>  EXPORT_SYMBOL_GPL(vtime_account);
>  #endif /* __ARCH_HAS_VTIME_ACCOUNT */
> +#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
>  
> -#else
> +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
> +void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t 
> *st)
> +{
> +     *ut = p->utime;
> +     *st = p->stime;
> +}
> +
> +void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, 
> cputime_t *st)
> +{
> +     struct task_cputime cputime;
> +
> +     thread_group_cputime(p, &cputime);
> +
> +     *ut = cputime.utime;
> +     *st = cputime.stime;
> +}
> +#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
>  
>  #ifndef nsecs_to_cputime
>  # define nsecs_to_cputime(__nsecs)   nsecs_to_jiffies(__nsecs)
> @@ -548,6 +553,12 @@ static void cputime_adjust(struct task_cputime *curr,
>  {
>       cputime_t rtime, utime, total;
>  
> +     if (vtime_accounting()) {
> +             *ut = curr->utime;
> +             *st = curr->stime;
> +             return;
> +     }
> +
>       utime = curr->utime;
>       total = utime + curr->stime;
>  
> @@ -601,7 +612,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, 
> cputime_t *ut, cputime
>       thread_group_cputime(p, &cputime);
>       cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
>  }
> -#endif
> +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
>  
>  #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
>  static DEFINE_PER_CPU(long, last_jiffies) = INITIAL_JIFFIES;
> @@ -643,6 +654,11 @@ void vtime_account_idle(struct task_struct *tsk)
>       account_idle_time(delta_cpu);
>  }
>  
> +bool vtime_accounting(void)
> +{
> +     return context_tracking_active();
> +}
> +
>  static int __cpuinit vtime_cpu_notify(struct notifier_block *self,
>                                     unsigned long action, void *hcpu)
>  {
> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> index fb8e5e4..ad0e6fa 100644
> --- a/kernel/time/tick-sched.c
> +++ b/kernel/time/tick-sched.c
> @@ -632,8 +632,11 @@ static void tick_nohz_restart_sched_tick(struct 
> tick_sched *ts, ktime_t now)
>  
>  static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
>  {
> -#ifndef CONFIG_VIRT_CPU_ACCOUNTING
> +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
>       unsigned long ticks;
> +
> +     if (vtime_accounting())
> +             return;
>       /*
>        * We stopped the tick in idle. Update process times would miss the
>        * time we slept as update_process_times does only a 1 tick
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to