On Tue, Nov 27, 2012 at 07:33:25PM +0100, Frederic Weisbecker wrote:
> Create a new subsystem that probes on kernel boundaries
> to keep track of the transitions between level contexts
> with two basic initial contexts: user or kernel.
> 
> This is an abstraction of some RCU code that use such tracking
> to implement its userspace extended quiescent state.
> 
> We need to pull this up from RCU into this new level of indirection
> because this tracking is also going to be used to implement an "on
> demand" generic virtual cputime accounting. A necessary step to
> shutdown the tick while still accounting the cputime.

I have queued this, and if it passes tests and inspection will try
pushing it for 3.8.

                                                        Thanx, Paul

> Signed-off-by: Frederic Weisbecker <fweis...@gmail.com>
> Cc: Andrew Morton <a...@linux-foundation.org>
> Cc: H. Peter Anvin <h...@zytor.com>
> Cc: Ingo Molnar <mi...@kernel.org>
> Cc: Paul E. McKenney <paul...@linux.vnet.ibm.com>
> Cc: Peter Zijlstra <pet...@infradead.org>
> Cc: Steven Rostedt <rost...@goodmis.org>
> Cc: Thomas Gleixner <t...@linutronix.de>
> Cc: Li Zhong <zh...@linux.vnet.ibm.com>
> Cc: Gilad Ben-Yossef gi...@benyossef.com
> ---
>  Changes since last version address Gilad's comments and include ifdef fixes.
>  Also CONTEXT_TRACKING_FORCE option has been moved below RCU user mode config
>  as it's the only user for now.
> 
>  arch/Kconfig                                       |   15 ++--
>  arch/x86/Kconfig                                   |    2 +-
>  arch/x86/include/asm/{rcu.h => context_tracking.h} |   15 ++--
>  arch/x86/kernel/entry_64.S                         |    2 +-
>  arch/x86/kernel/ptrace.c                           |    8 +-
>  arch/x86/kernel/signal.c                           |    5 +-
>  arch/x86/kernel/traps.c                            |    2 +-
>  arch/x86/mm/fault.c                                |    2 +-
>  include/linux/context_tracking.h                   |   18 ++++
>  include/linux/rcupdate.h                           |    2 -
>  init/Kconfig                                       |   28 ++++----
>  kernel/Makefile                                    |    1 +
>  kernel/context_tracking.c                          |   83 
> ++++++++++++++++++++
>  kernel/rcutree.c                                   |   64 +---------------
>  kernel/sched/core.c                                |   11 ++-
>  15 files changed, 150 insertions(+), 108 deletions(-)
>  rename arch/x86/include/asm/{rcu.h => context_tracking.h} (63%)
>  create mode 100644 include/linux/context_tracking.h
>  create mode 100644 kernel/context_tracking.c
> 
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 366ec06..cc74aae 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -300,15 +300,16 @@ config SECCOMP_FILTER
> 
>         See Documentation/prctl/seccomp_filter.txt for details.
> 
> -config HAVE_RCU_USER_QS
> +config HAVE_CONTEXT_TRACKING
>       bool
>       help
> -       Provide kernel entry/exit hooks necessary for userspace
> -       RCU extended quiescent state. Syscalls need to be wrapped inside
> -       rcu_user_exit()-rcu_user_enter() through the slow path using
> -       TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs
> -       are already protected inside rcu_irq_enter/rcu_irq_exit() but
> -       preemption or signal handling on irq exit still need to be protected.
> +       Provide kernel/user boundaries probes necessary for subsystems
> +       that need it, such as userspace RCU extended quiescent state.
> +       Syscalls need to be wrapped inside user_exit()-user_enter() through
> +       the slow path using TIF_NOHZ flag. Exceptions handlers must be
> +       wrapped as well. Irqs are already protected inside
> +       rcu_irq_enter/rcu_irq_exit() but preemption or signal handling on
> +       irq exit still need to be protected.
> 
>  config HAVE_VIRT_CPU_ACCOUNTING
>       bool
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 46c3bff..110cfad 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -106,7 +106,7 @@ config X86
>       select KTIME_SCALAR if X86_32
>       select GENERIC_STRNCPY_FROM_USER
>       select GENERIC_STRNLEN_USER
> -     select HAVE_RCU_USER_QS if X86_64
> +     select HAVE_CONTEXT_TRACKING if X86_64
>       select HAVE_IRQ_TIME_ACCOUNTING
>       select GENERIC_KERNEL_THREAD
>       select GENERIC_KERNEL_EXECVE
> diff --git a/arch/x86/include/asm/rcu.h 
> b/arch/x86/include/asm/context_tracking.h
> similarity index 63%
> rename from arch/x86/include/asm/rcu.h
> rename to arch/x86/include/asm/context_tracking.h
> index d1ac07a..1616562 100644
> --- a/arch/x86/include/asm/rcu.h
> +++ b/arch/x86/include/asm/context_tracking.h
> @@ -1,27 +1,26 @@
> -#ifndef _ASM_X86_RCU_H
> -#define _ASM_X86_RCU_H
> +#ifndef _ASM_X86_CONTEXT_TRACKING_H
> +#define _ASM_X86_CONTEXT_TRACKING_H
> 
>  #ifndef __ASSEMBLY__
> -
> -#include <linux/rcupdate.h>
> +#include <linux/context_tracking.h>
>  #include <asm/ptrace.h>
> 
>  static inline void exception_enter(struct pt_regs *regs)
>  {
> -     rcu_user_exit();
> +     user_exit();
>  }
> 
>  static inline void exception_exit(struct pt_regs *regs)
>  {
> -#ifdef CONFIG_RCU_USER_QS
> +#ifdef CONFIG_CONTEXT_TRACKING
>       if (user_mode(regs))
> -             rcu_user_enter();
> +             user_enter();
>  #endif
>  }
> 
>  #else /* __ASSEMBLY__ */
> 
> -#ifdef CONFIG_RCU_USER_QS
> +#ifdef CONFIG_CONTEXT_TRACKING
>  # define SCHEDULE_USER call schedule_user
>  #else
>  # define SCHEDULE_USER call schedule
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index 0c58952..98faeb3 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -56,7 +56,7 @@
>  #include <asm/ftrace.h>
>  #include <asm/percpu.h>
>  #include <asm/asm.h>
> -#include <asm/rcu.h>
> +#include <asm/context_tracking.h>
>  #include <asm/smap.h>
>  #include <linux/err.h>
> 
> diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
> index eff5b8c..65b88a5 100644
> --- a/arch/x86/kernel/ptrace.c
> +++ b/arch/x86/kernel/ptrace.c
> @@ -21,7 +21,7 @@
>  #include <linux/signal.h>
>  #include <linux/perf_event.h>
>  #include <linux/hw_breakpoint.h>
> -#include <linux/rcupdate.h>
> +#include <linux/context_tracking.h>
> 
>  #include <asm/uaccess.h>
>  #include <asm/pgtable.h>
> @@ -1461,7 +1461,7 @@ long syscall_trace_enter(struct pt_regs *regs)
>  {
>       long ret = 0;
> 
> -     rcu_user_exit();
> +     user_exit();
> 
>       /*
>        * If we stepped into a sysenter/syscall insn, it trapped in
> @@ -1516,7 +1516,7 @@ void syscall_trace_leave(struct pt_regs *regs)
>        * or do_notify_resume(), in which case we can be in RCU
>        * user mode.
>        */
> -     rcu_user_exit();
> +     user_exit();
> 
>       audit_syscall_exit(regs);
> 
> @@ -1534,5 +1534,5 @@ void syscall_trace_leave(struct pt_regs *regs)
>       if (step || test_thread_flag(TIF_SYSCALL_TRACE))
>               tracehook_report_syscall_exit(regs, step);
> 
> -     rcu_user_enter();
> +     user_enter();
>  }
> diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
> index 29ad351..20ecac1 100644
> --- a/arch/x86/kernel/signal.c
> +++ b/arch/x86/kernel/signal.c
> @@ -22,6 +22,7 @@
>  #include <linux/uaccess.h>
>  #include <linux/user-return-notifier.h>
>  #include <linux/uprobes.h>
> +#include <linux/context_tracking.h>
> 
>  #include <asm/processor.h>
>  #include <asm/ucontext.h>
> @@ -816,7 +817,7 @@ static void do_signal(struct pt_regs *regs)
>  void
>  do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
>  {
> -     rcu_user_exit();
> +     user_exit();
> 
>  #ifdef CONFIG_X86_MCE
>       /* notify userspace of pending MCEs */
> @@ -840,7 +841,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, 
> __u32 thread_info_flags)
>       if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
>               fire_user_return_notifiers();
> 
> -     rcu_user_enter();
> +     user_enter();
>  }
> 
>  void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index 8276dc6..eb85866 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -55,7 +55,7 @@
>  #include <asm/i387.h>
>  #include <asm/fpu-internal.h>
>  #include <asm/mce.h>
> -#include <asm/rcu.h>
> +#include <asm/context_tracking.h>
> 
>  #include <asm/mach_traps.h>
> 
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index 8e13ecb..b0b1f1d 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -18,7 +18,7 @@
>  #include <asm/pgalloc.h>             /* pgd_*(), ...                 */
>  #include <asm/kmemcheck.h>           /* kmemcheck_*(), ...           */
>  #include <asm/fixmap.h>                      /* VSYSCALL_START               
> */
> -#include <asm/rcu.h>                 /* exception_enter(), ...       */
> +#include <asm/context_tracking.h>            /* exception_enter(), ...       
> */
> 
>  /*
>   * Page fault error code bits:
> diff --git a/include/linux/context_tracking.h 
> b/include/linux/context_tracking.h
> new file mode 100644
> index 0000000..e24339c
> --- /dev/null
> +++ b/include/linux/context_tracking.h
> @@ -0,0 +1,18 @@
> +#ifndef _LINUX_CONTEXT_TRACKING_H
> +#define _LINUX_CONTEXT_TRACKING_H
> +
> +#ifdef CONFIG_CONTEXT_TRACKING
> +#include <linux/sched.h>
> +
> +extern void user_enter(void);
> +extern void user_exit(void);
> +extern void context_tracking_task_switch(struct task_struct *prev,
> +                                      struct task_struct *next);
> +#else
> +static inline void user_enter(void) { }
> +static inline void user_exit(void) { }
> +static inline void context_tracking_task_switch(struct task_struct *prev,
> +                                             struct task_struct *next) { }
> +#endif /* !CONFIG_CONTEXT_TRACKING */
> +
> +#endif
> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> index 8fe7c18..275aa3f 100644
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@ -222,8 +222,6 @@ extern void rcu_user_enter(void);
>  extern void rcu_user_exit(void);
>  extern void rcu_user_enter_after_irq(void);
>  extern void rcu_user_exit_after_irq(void);
> -extern void rcu_user_hooks_switch(struct task_struct *prev,
> -                               struct task_struct *next);
>  #else
>  static inline void rcu_user_enter(void) { }
>  static inline void rcu_user_exit(void) { }
> diff --git a/init/Kconfig b/init/Kconfig
> index 5ac6ee0..2054e04 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -486,9 +486,13 @@ config PREEMPT_RCU
>         This option enables preemptible-RCU code that is common between
>         the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
> 
> +config CONTEXT_TRACKING
> +       bool
> +
>  config RCU_USER_QS
>       bool "Consider userspace as in RCU extended quiescent state"
> -     depends on HAVE_RCU_USER_QS && SMP
> +     depends on HAVE_CONTEXT_TRACKING && SMP
> +     select CONTEXT_TRACKING
>       help
>         This option sets hooks on kernel / userspace boundaries and
>         puts RCU in extended quiescent state when the CPU runs in
> @@ -497,24 +501,20 @@ config RCU_USER_QS
>         try to keep the timer tick on for RCU.
> 
>         Unless you want to hack and help the development of the full
> -       tickless feature, you shouldn't enable this option.  It also
> +       dynticks mode, you shouldn't enable this option.  It also
>         adds unnecessary overhead.
> 
>         If unsure say N
> 
> -config RCU_USER_QS_FORCE
> -     bool "Force userspace extended QS by default"
> -     depends on RCU_USER_QS
> +config CONTEXT_TRACKING_FORCE
> +     bool "Force context tracking"
> +     depends on CONTEXT_TRACKING
>       help
> -       Set the hooks in user/kernel boundaries by default in order to
> -       test this feature that treats userspace as an extended quiescent
> -       state until we have a real user like a full adaptive nohz option.
> -
> -       Unless you want to hack and help the development of the full
> -       tickless feature, you shouldn't enable this option. It adds
> -       unnecessary overhead.
> -
> -       If unsure say N
> +       Probe on user/kernel boundaries by default in order to
> +       test the features that rely on it such as userspace RCU extended
> +       quiescent states.
> +       This test is there for debugging until we have a real user like the
> +       full dynticks mode.
> 
>  config RCU_FANOUT
>       int "Tree-based hierarchical RCU fanout value"
> diff --git a/kernel/Makefile b/kernel/Makefile
> index 0dfeca4..f90bbfc 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -110,6 +110,7 @@ obj-$(CONFIG_USER_RETURN_NOTIFIER) += 
> user-return-notifier.o
>  obj-$(CONFIG_PADATA) += padata.o
>  obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
>  obj-$(CONFIG_JUMP_LABEL) += jump_label.o
> +obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
> 
>  $(obj)/configs.o: $(obj)/config_data.h
> 
> diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
> new file mode 100644
> index 0000000..e0e07fd
> --- /dev/null
> +++ b/kernel/context_tracking.c
> @@ -0,0 +1,83 @@
> +#include <linux/context_tracking.h>
> +#include <linux/rcupdate.h>
> +#include <linux/sched.h>
> +#include <linux/percpu.h>
> +#include <linux/hardirq.h>
> +
> +struct context_tracking {
> +     /*
> +      * When active is false, hooks are not set to
> +      * minimize overhead: TIF flags are cleared
> +      * and calls to user_enter/exit are ignored. This
> +      * may be further optimized using static keys.
> +      */
> +     bool active;
> +     enum {
> +             IN_KERNEL = 0,
> +             IN_USER,
> +     } state;
> +};
> +
> +static DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
> +#ifdef CONFIG_CONTEXT_TRACKING_FORCE
> +     .active = true,
> +#endif
> +};
> +
> +void user_enter(void)
> +{
> +     unsigned long flags;
> +
> +     /*
> +      * Some contexts may involve an exception occuring in an irq,
> +      * leading to that nesting:
> +      * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
> +      * This would mess up the dyntick_nesting count though. And rcu_irq_*()
> +      * helpers are enough to protect RCU uses inside the exception. So
> +      * just return immediately if we detect we are in an IRQ.
> +      */
> +     if (in_interrupt())
> +             return;
> +
> +     WARN_ON_ONCE(!current->mm);
> +
> +     local_irq_save(flags);
> +     if (__this_cpu_read(context_tracking.active) &&
> +         __this_cpu_read(context_tracking.state) != IN_USER) {
> +             __this_cpu_write(context_tracking.state, IN_USER);
> +             rcu_user_enter();
> +     }
> +     local_irq_restore(flags);
> +}
> +
> +void user_exit(void)
> +{
> +     unsigned long flags;
> +
> +     /*
> +      * Some contexts may involve an exception occuring in an irq,
> +      * leading to that nesting:
> +      * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
> +      * This would mess up the dyntick_nesting count though. And rcu_irq_*()
> +      * helpers are enough to protect RCU uses inside the exception. So
> +      * just return immediately if we detect we are in an IRQ.
> +      */
> +     if (in_interrupt())
> +             return;
> +
> +     local_irq_save(flags);
> +     if (__this_cpu_read(context_tracking.state) == IN_USER) {
> +             __this_cpu_write(context_tracking.state, IN_KERNEL);
> +             rcu_user_exit();
> +     }
> +     local_irq_restore(flags);
> +}
> +
> +void context_tracking_task_switch(struct task_struct *prev,
> +                          struct task_struct *next)
> +{
> +     if (__this_cpu_read(context_tracking.active)) {
> +             clear_tsk_thread_flag(prev, TIF_NOHZ);
> +             set_tsk_thread_flag(next, TIF_NOHZ);
> +     }
> +}
> diff --git a/kernel/rcutree.c b/kernel/rcutree.c
> index 7733eb5..e441b77 100644
> --- a/kernel/rcutree.c
> +++ b/kernel/rcutree.c
> @@ -207,9 +207,6 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
>  DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
>       .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
>       .dynticks = ATOMIC_INIT(1),
> -#if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE)
> -     .ignore_user_qs = true,
> -#endif
>  };
> 
>  static long blimit = 10;     /* Maximum callbacks per rcu_do_batch. */
> @@ -420,29 +417,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
>   */
>  void rcu_user_enter(void)
>  {
> -     unsigned long flags;
> -     struct rcu_dynticks *rdtp;
> -
> -     /*
> -      * Some contexts may involve an exception occuring in an irq,
> -      * leading to that nesting:
> -      * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
> -      * This would mess up the dyntick_nesting count though. And rcu_irq_*()
> -      * helpers are enough to protect RCU uses inside the exception. So
> -      * just return immediately if we detect we are in an IRQ.
> -      */
> -     if (in_interrupt())
> -             return;
> -
> -     WARN_ON_ONCE(!current->mm);
> -
> -     local_irq_save(flags);
> -     rdtp = &__get_cpu_var(rcu_dynticks);
> -     if (!rdtp->ignore_user_qs && !rdtp->in_user) {
> -             rdtp->in_user = true;
> -             rcu_eqs_enter(true);
> -     }
> -     local_irq_restore(flags);
> +     rcu_eqs_enter(1);
>  }
> 
>  /**
> @@ -579,27 +554,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit);
>   */
>  void rcu_user_exit(void)
>  {
> -     unsigned long flags;
> -     struct rcu_dynticks *rdtp;
> -
> -     /*
> -      * Some contexts may involve an exception occuring in an irq,
> -      * leading to that nesting:
> -      * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
> -      * This would mess up the dyntick_nesting count though. And rcu_irq_*()
> -      * helpers are enough to protect RCU uses inside the exception. So
> -      * just return immediately if we detect we are in an IRQ.
> -      */
> -     if (in_interrupt())
> -             return;
> -
> -     local_irq_save(flags);
> -     rdtp = &__get_cpu_var(rcu_dynticks);
> -     if (rdtp->in_user) {
> -             rdtp->in_user = false;
> -             rcu_eqs_exit(true);
> -     }
> -     local_irq_restore(flags);
> +     rcu_eqs_exit(1);
>  }
> 
>  /**
> @@ -722,21 +677,6 @@ int rcu_is_cpu_idle(void)
>  }
>  EXPORT_SYMBOL(rcu_is_cpu_idle);
> 
> -#ifdef CONFIG_RCU_USER_QS
> -void rcu_user_hooks_switch(struct task_struct *prev,
> -                        struct task_struct *next)
> -{
> -     struct rcu_dynticks *rdtp;
> -
> -     /* Interrupts are disabled in context switch */
> -     rdtp = &__get_cpu_var(rcu_dynticks);
> -     if (!rdtp->ignore_user_qs) {
> -             clear_tsk_thread_flag(prev, TIF_NOHZ);
> -             set_tsk_thread_flag(next, TIF_NOHZ);
> -     }
> -}
> -#endif /* #ifdef CONFIG_RCU_USER_QS */
> -
>  #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
> 
>  /*
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 36f2608..80f80df 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -72,6 +72,7 @@
>  #include <linux/slab.h>
>  #include <linux/init_task.h>
>  #include <linux/binfmts.h>
> +#include <linux/context_tracking.h>
> 
>  #include <asm/switch_to.h>
>  #include <asm/tlb.h>
> @@ -1886,8 +1887,8 @@ context_switch(struct rq *rq, struct task_struct *prev,
>       spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
>  #endif
> 
> +     context_tracking_task_switch(prev, next);
>       /* Here we just switch the register state and the stack. */
> -     rcu_user_hooks_switch(prev, next);
>       switch_to(prev, next, prev);
> 
>       barrier();
> @@ -2911,7 +2912,7 @@ asmlinkage void __sched schedule(void)
>  }
>  EXPORT_SYMBOL(schedule);
> 
> -#ifdef CONFIG_RCU_USER_QS
> +#ifdef CONFIG_CONTEXT_TRACKING
>  asmlinkage void __sched schedule_user(void)
>  {
>       /*
> @@ -2920,9 +2921,9 @@ asmlinkage void __sched schedule_user(void)
>        * we haven't yet exited the RCU idle mode. Do it here manually until
>        * we find a better solution.
>        */
> -     rcu_user_exit();
> +     user_exit();
>       schedule();
> -     rcu_user_enter();
> +     user_enter();
>  }
>  #endif
> 
> @@ -3027,7 +3028,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
>       /* Catch callers which need to be fixed */
>       BUG_ON(ti->preempt_count || !irqs_disabled());
> 
> -     rcu_user_exit();
> +     user_exit();
>       do {
>               add_preempt_count(PREEMPT_ACTIVE);
>               local_irq_enable();
> -- 
> 1.7.5.4
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to