Em Thu, Sep 07, 2017 at 10:55:45AM -0700, kan.li...@intel.com escreveu:
> From: Kan Liang <kan.li...@intel.com>
> 
> To process any events, it needs to find the thread in the machine first.
> The machine maintains a rb tree to store all threads. The rb tree is
> protected by a rw lock.
> It is not a problem for current perf which serially processing events.
> However, it will have scalability performance issue to process events in
> parallel, especially on a heave load system which have many threads.
> 
> Introduce a hashtable to divide the big rb tree into many samll rb tree
> for threads. The index is thread id % hashtable size. It can reduce the
> lock contention.

<SNIP>
 
> +++ b/tools/perf/util/machine.h
> @@ -23,6 +23,17 @@ extern const char *ref_reloc_sym_names[];
>  
>  struct vdso_info;
>  
> +#define MACHINE_TH_TABLE_BITS        8
> +#define MACHINE_TH_TABLE_SIZE        (1 << MACHINE_TH_TABLE_BITS)
> +
> +struct machine_th {
> +     struct rb_root    threads;
> +     pthread_rwlock_t  threads_lock;
> +     unsigned int      nr_threads;
> +     struct list_head  dead_threads;
> +     struct thread     *last_match;
> +};
> +

Call it just "threads", no need to call it then threads->threads, but
threads->entries, also no threads->threads_lock, but threads->lock,
threads->deads, threads->nr.

MACHINE_TH_TABLE_SIZE -> THREADS__TABLE_SIZE, etc.

>  struct machine {
>       struct rb_node    rb_node;
>       pid_t             pid;
> @@ -30,11 +41,7 @@ struct machine {
>       bool              comm_exec;
>       bool              kptr_restrict_warned;
>       char              *root_dir;
> -     struct rb_root    threads;
> -     pthread_rwlock_t  threads_lock;
> -     unsigned int      nr_threads;
> -     struct list_head  dead_threads;
> -     struct thread     *last_match;
> +     struct machine_th threads[MACHINE_TH_TABLE_SIZE];
>       struct vdso_info  *vdso_info;
>       struct perf_env   *env;
>       struct dsos       dsos;
> @@ -49,6 +56,12 @@ struct machine {
>  };
>  
>  static inline
> +struct machine_th *machine_thread(struct machine *machine, pid_t tid)

We separate the class name (machine) from the method name (thread) using
double underscores, i.e. the above becomes:

static inline threads *machine__threads(struct machine *machine, pid_t tid)

> +{
> +     return &machine->threads[tid % MACHINE_TH_TABLE_SIZE];
> +}
> +
> +static inline
>  struct map *__machine__kernel_map(struct machine *machine, enum map_type 
> type)
>  {
>       return machine->vmlinux_maps[type];
> diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
> index 808cc45..bbd78fa 100644
> --- a/tools/perf/util/rb_resort.h
> +++ b/tools/perf/util/rb_resort.h
> @@ -143,7 +143,8 @@ struct __name##_sorted *__name = __name##_sorted__new
>                                 __ilist->rblist.nr_entries)
>  
>  /* For 'struct machine->threads' */
> -#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine)                 
> \
> -     DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads)
> +#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, tid)            
> \
> +     DECLARE_RESORT_RB(__name)(&__machine->threads[tid].threads,             
> \
> +                               __machine->threads[tid].nr_threads)
>  
>  #endif /* _PERF_RESORT_RB_H_ */
> -- 
> 2.5.5

Reply via email to