Em Thu, Sep 07, 2017 at 10:55:45AM -0700, kan.li...@intel.com escreveu: > From: Kan Liang <kan.li...@intel.com> > > To process any events, it needs to find the thread in the machine first. > The machine maintains a rb tree to store all threads. The rb tree is > protected by a rw lock. > It is not a problem for current perf which serially processing events. > However, it will have scalability performance issue to process events in > parallel, especially on a heave load system which have many threads. > > Introduce a hashtable to divide the big rb tree into many samll rb tree > for threads. The index is thread id % hashtable size. It can reduce the > lock contention.
<SNIP> > +++ b/tools/perf/util/machine.h > @@ -23,6 +23,17 @@ extern const char *ref_reloc_sym_names[]; > > struct vdso_info; > > +#define MACHINE_TH_TABLE_BITS 8 > +#define MACHINE_TH_TABLE_SIZE (1 << MACHINE_TH_TABLE_BITS) > + > +struct machine_th { > + struct rb_root threads; > + pthread_rwlock_t threads_lock; > + unsigned int nr_threads; > + struct list_head dead_threads; > + struct thread *last_match; > +}; > + Call it just "threads", no need to call it then threads->threads, but threads->entries, also no threads->threads_lock, but threads->lock, threads->deads, threads->nr. MACHINE_TH_TABLE_SIZE -> THREADS__TABLE_SIZE, etc. > struct machine { > struct rb_node rb_node; > pid_t pid; > @@ -30,11 +41,7 @@ struct machine { > bool comm_exec; > bool kptr_restrict_warned; > char *root_dir; > - struct rb_root threads; > - pthread_rwlock_t threads_lock; > - unsigned int nr_threads; > - struct list_head dead_threads; > - struct thread *last_match; > + struct machine_th threads[MACHINE_TH_TABLE_SIZE]; > struct vdso_info *vdso_info; > struct perf_env *env; > struct dsos dsos; > @@ -49,6 +56,12 @@ struct machine { > }; > > static inline > +struct machine_th *machine_thread(struct machine *machine, pid_t tid) We separate the class name (machine) from the method name (thread) using double underscores, i.e. the above becomes: static inline threads *machine__threads(struct machine *machine, pid_t tid) > +{ > + return &machine->threads[tid % MACHINE_TH_TABLE_SIZE]; > +} > + > +static inline > struct map *__machine__kernel_map(struct machine *machine, enum map_type > type) > { > return machine->vmlinux_maps[type]; > diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h > index 808cc45..bbd78fa 100644 > --- a/tools/perf/util/rb_resort.h > +++ b/tools/perf/util/rb_resort.h > @@ -143,7 +143,8 @@ struct __name##_sorted *__name = __name##_sorted__new > __ilist->rblist.nr_entries) > > /* For 'struct machine->threads' */ > -#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine) > \ > - DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads) > +#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, tid) > \ > + DECLARE_RESORT_RB(__name)(&__machine->threads[tid].threads, > \ > + __machine->threads[tid].nr_threads) > > #endif /* _PERF_RESORT_RB_H_ */ > -- > 2.5.5