On Sat, Aug 31, 2013 at 01:11:18AM -0400, Steven Rostedt wrote:
> From: "Steven Rostedt (Red Hat)" <rost...@goodmis.org>
> 
> Some ftrace function tracing callbacks use RCU (perf), thus if
> it gets called from tracing a function outside of the RCU tracking,
> like in entering or leaving NO_HZ idle/userspace, the RCU read locks
> in the callback are useless.
> 
> As normal function tracing does not use RCU, and function tracing
> happens to help debug RCU, we do not want to limit all callbacks
> from tracing these "unsafe" RCU functions. Instead, we create an
> infrastructure that will let us mark these unsafe RCU functions
> and we will only allow callbacks that explicitly say they do not
> use RCU to be called by these functions.
> 
> This patch adds the infrastructure to create the hash of functions
> that are RCU unsafe. This is done with the FTRACE_UNSAFE_RCU()
> macro. It works like EXPORT_SYMBOL() does. Simply place the macro
> under the RCU unsafe function:
> 
> void func(void)
> {
>       [...]
> }
> FTRACE_UNSAFE_RCU(func);
> 
> Cc: Jiri Olsa <jo...@redhat.com>
> Cc: Paul E. McKenney <paul...@linux.vnet.ibm.com>
> Signed-off-by: Steven Rostedt <rost...@goodmis.org>

Acked-by: Paul E. McKenney <paul...@linux.vnet.ibm.com>

> ---
>  include/asm-generic/vmlinux.lds.h |   10 +++
>  include/linux/ftrace.h            |   38 +++++++++++
>  kernel/trace/ftrace.c             |  135 
> ++++++++++++++++++++++++++++++++++---
>  3 files changed, 174 insertions(+), 9 deletions(-)
> 
> diff --git a/include/asm-generic/vmlinux.lds.h 
> b/include/asm-generic/vmlinux.lds.h
> index 69732d2..fdfddd2 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -93,6 +93,15 @@
>  #define MCOUNT_REC()
>  #endif
> 
> +#ifdef CONFIG_FUNCTION_TRACER
> +#define FTRACE_UNSAFE_RCU()  . = ALIGN(8);                           \
> +                     VMLINUX_SYMBOL(__start_ftrace_unsafe_rcu) = .;  \
> +                     *(_ftrace_unsafe_rcu)                           \
> +                     VMLINUX_SYMBOL(__stop_ftrace_unsafe_rcu) = .;
> +#else
> +#define FTRACE_UNSAFE_RCU()
> +#endif
> +
>  #ifdef CONFIG_TRACE_BRANCH_PROFILING
>  #define LIKELY_PROFILE()     
> VMLINUX_SYMBOL(__start_annotated_branch_profile) = .; \
>                               *(_ftrace_annotated_branch)                     
>       \
> @@ -479,6 +488,7 @@
>       MEM_DISCARD(init.data)                                          \
>       KERNEL_CTORS()                                                  \
>       MCOUNT_REC()                                                    \
> +     FTRACE_UNSAFE_RCU()                                             \
>       *(.init.rodata)                                                 \
>       FTRACE_EVENTS()                                                 \
>       TRACE_SYSCALLS()                                                \
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 9f15c00..1d17a82 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -92,6 +92,9 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned 
> long parent_ip,
>   * STUB   - The ftrace_ops is just a place holder.
>   * INITIALIZED - The ftrace_ops has already been initialized (first use time
>   *            register_ftrace_function() is called, it will initialized the 
> ops)
> + * RCU_SAFE - The callback uses no rcu type locking. This allows the
> + *            callback to be called in locations that RCU is not active.
> + *            (like going into or coming out of idle NO_HZ)
>   */
>  enum {
>       FTRACE_OPS_FL_ENABLED                   = 1 << 0,
> @@ -103,6 +106,7 @@ enum {
>       FTRACE_OPS_FL_RECURSION_SAFE            = 1 << 6,
>       FTRACE_OPS_FL_STUB                      = 1 << 7,
>       FTRACE_OPS_FL_INITIALIZED               = 1 << 8,
> +     FTRACE_OPS_FL_RCU_SAFE                  = 1 << 9,
>  };
> 
>  struct ftrace_ops {
> @@ -219,6 +223,40 @@ static inline int ftrace_function_local_disabled(struct 
> ftrace_ops *ops)
>  extern void ftrace_stub(unsigned long a0, unsigned long a1,
>                       struct ftrace_ops *op, struct pt_regs *regs);
> 
> +/*
> + * In order to speed up boot, save both the name and the
> + * address of the function to find to add to hashes. The
> + * list of function mcount addresses are sorted by the address,
> + * but we still need to use the name to find the actual location
> + * as the mcount call is usually not at the address of the
> + * start of the function.
> + */
> +struct ftrace_func_finder {
> +     const char              *name;
> +     unsigned long           ip;
> +};
> +
> +/*
> + * For functions that are called when RCU is not tracking the CPU
> + * (like for entering or leaving NO_HZ mode, and RCU then ignores
> + * the CPU), they need to be marked with FTRACE_UNSAFE_RCU().
> + * This will prevent all function tracing callbacks from calling
> + * this function unless the callback explicitly states that it
> + * doesn't use RCU with the FTRACE_OPS_FL_RCU_SAFE flag.
> + *
> + * The usage of FTRACE_UNSAFE_RCU() is the same as EXPORT_SYMBOL().
> + * Just place the macro underneath the function that is unsafe.
> + */
> +#define FTRACE_UNSAFE_RCU(func) \
> +     static struct ftrace_func_finder __ftrace_unsafe_rcu_##func     \
> +      __initdata = {                                                 \
> +             .name = #func,                                          \
> +             .ip = (unsigned long)func,                              \
> +     };                                                      \
> +     struct ftrace_func_finder *__ftrace_unsafe_rcu_ptr_##func       \
> +               __attribute__((__section__("_ftrace_unsafe_rcu"))) =  \
> +             &__ftrace_unsafe_rcu_##func
> +
>  #else /* !CONFIG_FUNCTION_TRACER */
>  /*
>   * (un)register_ftrace_function must be a macro since the ops parameter
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index a6d098c..3750360 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -1356,6 +1356,23 @@ ftrace_hash_rec_disable(struct ftrace_ops *ops, int 
> filter_hash);
>  static void
>  ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash);
> 
> +static int ftrace_convert_size_to_bits(int size)
> +{
> +     int bits;
> +
> +     /*
> +      * Make the hash size about 1/2 the # found
> +      */
> +     for (size /= 2; size; size >>= 1)
> +             bits++;
> +
> +     /* Don't allocate too much */
> +     if (bits > FTRACE_HASH_MAX_BITS)
> +             bits = FTRACE_HASH_MAX_BITS;
> +
> +     return bits;
> +}
> +
>  static int
>  ftrace_hash_move(struct ftrace_ops *ops, int enable,
>                struct ftrace_hash **dst, struct ftrace_hash *src)
> @@ -1388,15 +1405,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
>               goto out;
>       }
> 
> -     /*
> -      * Make the hash size about 1/2 the # found
> -      */
> -     for (size /= 2; size; size >>= 1)
> -             bits++;
> -
> -     /* Don't allocate too much */
> -     if (bits > FTRACE_HASH_MAX_BITS)
> -             bits = FTRACE_HASH_MAX_BITS;
> +     bits = ftrace_convert_size_to_bits(size);
> 
>       ret = -ENOMEM;
>       new_hash = alloc_ftrace_hash(bits);
> @@ -1486,6 +1495,74 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long 
> ip, void *regs)
>       }
> 
> 
> +static __init int ftrace_find_ip(const void *a, const void *b)
> +{
> +     const struct dyn_ftrace *key = a;
> +     const struct dyn_ftrace *rec = b;
> +
> +     if (key->ip == rec->ip)
> +             return 0;
> +
> +     if (key->ip < rec->ip) {
> +             /* If previous is less than, then this is our func */
> +             rec--;
> +             if (rec->ip < key->ip)
> +                     return 0;
> +             return -1;
> +     }
> +
> +     /* All else, we are greater */
> +     return 1;
> +}
> +
> +/*
> + * Find the mcount caller location given the ip address of the
> + * function that contains it. As the mcount caller is usually
> + * after the mcount itself.
> + *
> + * Done for just core functions at boot.
> + */
> +static __init unsigned long ftrace_mcount_from_func(unsigned long ip)
> +{
> +     struct ftrace_page *pg, *last_pg = NULL;
> +     struct dyn_ftrace *rec;
> +     struct dyn_ftrace key;
> +
> +     key.ip = ip;
> +
> +     for (pg = ftrace_pages_start; pg; last_pg = pg, pg = pg->next) {
> +             if (ip >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
> +                     continue;
> +
> +             /*
> +              * Do the first record manually, as we need to check
> +              * the previous record from the previous page
> +              */
> +             if (ip < pg->records[0].ip) {
> +                     /* First page? Then we found our record! */
> +                     if (!last_pg)
> +                             return pg->records[0].ip;
> +
> +                     rec = &last_pg->records[last_pg->index - 1];
> +                     if (rec->ip < ip)
> +                             return pg->records[0].ip;
> +
> +                     /* Not found */
> +                     return 0;
> +             }
> +
> +             rec = bsearch(&key, pg->records + 1, pg->index - 1,
> +                           sizeof(struct dyn_ftrace),
> +                           ftrace_find_ip);
> +             if (rec)
> +                     return rec->ip;
> +
> +             break;
> +     }
> +
> +     return 0;
> +}
> +
>  static int ftrace_cmp_recs(const void *a, const void *b)
>  {
>       const struct dyn_ftrace *key = a;
> @@ -4211,6 +4288,44 @@ struct notifier_block ftrace_module_exit_nb = {
>       .priority = INT_MIN,    /* Run after anything that can remove kprobes */
>  };
> 
> +extern struct ftrace_func_finder *__start_ftrace_unsafe_rcu[];
> +extern struct ftrace_func_finder *__stop_ftrace_unsafe_rcu[];
> +
> +static struct ftrace_hash *ftrace_unsafe_rcu;
> +
> +static void __init create_unsafe_rcu_hash(void)
> +{
> +     struct ftrace_func_finder *finder;
> +     struct ftrace_func_finder **p;
> +     unsigned long ip;
> +     char str[KSYM_SYMBOL_LEN];
> +     int count;
> +
> +     count = __stop_ftrace_unsafe_rcu - __start_ftrace_unsafe_rcu;
> +     if (!count)
> +             return;
> +
> +     count = ftrace_convert_size_to_bits(count);
> +     ftrace_unsafe_rcu = alloc_ftrace_hash(count);
> +
> +     for (p = __start_ftrace_unsafe_rcu;
> +          p < __stop_ftrace_unsafe_rcu;
> +          p++) {
> +             finder = *p;
> +
> +             ip = ftrace_mcount_from_func(finder->ip);
> +             if (WARN_ON_ONCE(!ip))
> +                     continue;
> +
> +             /* Make sure this is our ip */
> +             kallsyms_lookup(ip, NULL, NULL, NULL, str);
> +             if (WARN_ON_ONCE(strcmp(str, finder->name) != 0))
> +                     continue;
> +
> +             add_hash_entry(ftrace_unsafe_rcu, ip);
> +     }
> +}
> +
>  extern unsigned long __start_mcount_loc[];
>  extern unsigned long __stop_mcount_loc[];
> 
> @@ -4250,6 +4365,8 @@ void __init ftrace_init(void)
>       if (ret)
>               pr_warning("Failed to register trace ftrace module exit 
> notifier\n");
> 
> +     create_unsafe_rcu_hash();
> +
>       set_ftrace_early_filters();
> 
>       return;
> -- 
> 1.7.10.4
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to