* Paul E. McKenney ([email protected]) wrote:
> On Mon, Jun 06, 2011 at 04:47:48PM -0400, Mathieu Desnoyers wrote:
> > * Paul E. McKenney ([email protected]) wrote:
> > > On Mon, Jun 06, 2011 at 01:45:59PM -0400, Mathieu Desnoyers wrote:
> > > > I played a bit with the call_rcu() implementation alongside with my
> > > > rbtree tests, and noticed the following:
> > > > 
> > > > If I use per-cpu call_rcu threads with URCU_CALL_RCU_RT flag, with one
> > > > updater thread only for my rbtree (no reader), I get 38365 updates/s.
> > > > If I add cpu affinity to these per-cpu call_rcu threads (I have prepared
> > > > a patch that does this), it jumps to 54219 updates/s.  So it looks like
> > > > keeping per-cpu affinity for the call_rcu thread is a good thing.
> > > 
> > > Makes sense to me!
> > > 
> > > Main comment is that API.h should also be updated.
> > 
> > I updated API.txt. What is API.h ?
> 
> API.h is a typo on my part, thank you for your correct interpretation!

OK, patch merged as-is.

Thanks,

Mathieu

> 
>                                                       Thanx, Paul
> 
> > Mathieu
> > 
> > > 
> > >                                                   Thanx, Paul
> > > 
> > > > Signed-off-by: Mathieu Desnoyers <[email protected]>
> > > > ---
> > > >  API.txt              |    7 +++++--
> > > >  tests/rcutorture.h   |    4 ++--
> > > >  urcu-call-rcu-impl.h |   51 
> > > > ++++++++++++++++++++++++++++++++++++++++++++-------
> > > >  urcu-call-rcu.h      |    3 ++-
> > > >  urcu-qsbr.c          |    1 +
> > > >  urcu.c               |    1 +
> > > >  6 files changed, 55 insertions(+), 12 deletions(-)
> > > > 
> > > > Index: userspace-rcu/API.txt
> > > > ===================================================================
> > > > --- userspace-rcu.orig/API.txt
> > > > +++ userspace-rcu/API.txt
> > > > @@ -59,12 +59,15 @@ void call_rcu(struct rcu_head *head,
> > > > 
> > > >                 call_rcu(&p->rcu, func);
> > > > 
> > > > -struct call_rcu_data *create_call_rcu_data(unsigned long flags);
> > > > +struct call_rcu_data *create_call_rcu_data(unsigned long flags,
> > > > +                                          int cpu_affinity);
> > > > 
> > > >         Returns a handle that can be passed to the following
> > > >         primitives.  The "flags" argument can be zero, or can be
> > > >         URCU_CALL_RCU_RT if the worker threads associated with the
> > > > -       new helper thread are to get real-time response.
> > > > +       new helper thread are to get real-time response. The argument
> > > > +       "cpu_affinity" specifies a cpu on which the call_rcu thread 
> > > > should
> > > > +       be affined to. It is ignored if negative.
> > > > 
> > > >  struct call_rcu_data *get_default_call_rcu_data(void);
> > > > 
> > > > Index: userspace-rcu/tests/rcutorture.h
> > > > ===================================================================
> > > > --- userspace-rcu.orig/tests/rcutorture.h
> > > > +++ userspace-rcu/tests/rcutorture.h
> > > > @@ -156,7 +156,7 @@ void *rcu_update_perf_test(void *arg)
> > > >         if ((random() & 0xf00) == 0) {
> > > >                 struct call_rcu_data *crdp;
> > > > 
> > > > -               crdp = create_call_rcu_data(0);
> > > > +               crdp = create_call_rcu_data(0, -1);
> > > >                 if (crdp != NULL) {
> > > >                         fprintf(stderr,
> > > >                                 "Using per-thread call_rcu() 
> > > > worker.\n");
> > > > @@ -385,7 +385,7 @@ void *rcu_fake_update_stress_test(void *
> > > >         if ((random() & 0xf00) == 0) {
> > > >                 struct call_rcu_data *crdp;
> > > > 
> > > > -               crdp = create_call_rcu_data(0);
> > > > +               crdp = create_call_rcu_data(0, -1);
> > > >                 if (crdp != NULL) {
> > > >                         fprintf(stderr,
> > > >                                 "Using per-thread call_rcu() 
> > > > worker.\n");
> > > > Index: userspace-rcu/urcu-call-rcu-impl.h
> > > > ===================================================================
> > > > --- userspace-rcu.orig/urcu-call-rcu-impl.h
> > > > +++ userspace-rcu/urcu-call-rcu-impl.h
> > > > @@ -20,6 +20,7 @@
> > > >   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
> > > > 02110-1301 USA
> > > >   */
> > > > 
> > > > +#define _GNU_SOURCE
> > > >  #include <stdio.h>
> > > >  #include <pthread.h>
> > > >  #include <signal.h>
> > > > @@ -31,6 +32,7 @@
> > > >  #include <sys/time.h>
> > > >  #include <syscall.h>
> > > >  #include <unistd.h>
> > > > +#include <sched.h>
> > > > 
> > > >  #include "config.h"
> > > >  #include "urcu/wfqueue.h"
> > > > @@ -47,6 +49,7 @@ struct call_rcu_data {
> > > >         pthread_cond_t cond;
> > > >         unsigned long qlen;
> > > >         pthread_t tid;
> > > > +       int cpu_affinity;
> > > >         struct cds_list_head list;
> > > >  } __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
> > > > 
> > > > @@ -146,6 +149,31 @@ static void call_rcu_unlock(pthread_mute
> > > >         }
> > > >  }
> > > > 
> > > > +#if HAVE_SCHED_SETAFFINITY
> > > > +static
> > > > +int set_thread_cpu_affinity(struct call_rcu_data *crdp)
> > > > +{
> > > > +       cpu_set_t mask;
> > > > +
> > > > +       if (crdp->cpu_affinity < 0)
> > > > +               return 0;
> > > > +
> > > > +       CPU_ZERO(&mask);
> > > > +       CPU_SET(crdp->cpu_affinity, &mask);
> > > > +#if SCHED_SETAFFINITY_ARGS == 2
> > > > +       return sched_setaffinity(0, &mask);
> > > > +#else
> > > > +       return sched_setaffinity(0, sizeof(mask), &mask);
> > > > +#endif
> > > > +}
> > > > +#else
> > > > +static
> > > > +int set_thread_cpu_affinity(struct call_rcu_data *crdp)
> > > > +{
> > > > +       return 0;
> > > > +}
> > > > +#endif
> > > > +
> > > >  /* This is the code run by each call_rcu thread. */
> > > > 
> > > >  static void *call_rcu_thread(void *arg)
> > > > @@ -156,6 +184,11 @@ static void *call_rcu_thread(void *arg)
> > > >         struct call_rcu_data *crdp = (struct call_rcu_data *)arg;
> > > >         struct rcu_head *rhp;
> > > > 
> > > > +       if (set_thread_cpu_affinity(crdp) != 0) {
> > > > +               perror("pthread_setaffinity_np");
> > > > +               exit(-1);
> > > > +       }
> > > > +
> > > >         thread_call_rcu_data = crdp;
> > > >         for (;;) {
> > > >                 if (&crdp->cbs.head != 
> > > > _CMM_LOAD_SHARED(crdp->cbs.tail)) {
> > > > @@ -214,7 +247,8 @@ static void *call_rcu_thread(void *arg)
> > > >   */
> > > > 
> > > >  static void call_rcu_data_init(struct call_rcu_data **crdpp,
> > > > -                              unsigned long flags)
> > > > +                              unsigned long flags,
> > > > +                              int cpu_affinity)
> > > >  {
> > > >         struct call_rcu_data *crdp;
> > > > 
> > > > @@ -236,6 +270,7 @@ static void call_rcu_data_init(struct ca
> > > >         }
> > > >         crdp->flags = flags | URCU_CALL_RCU_RUNNING;
> > > >         cds_list_add(&crdp->list, &call_rcu_data_list);
> > > > +       crdp->cpu_affinity = cpu_affinity;
> > > >         cmm_smp_mb();  /* Structure initialized before pointer is 
> > > > planted. */
> > > >         *crdpp = crdp;
> > > >         if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 
> > > > 0) {
> > > > @@ -280,20 +315,22 @@ pthread_t get_call_rcu_thread(struct cal
> > > >   * Create a call_rcu_data structure (with thread) and return a pointer.
> > > >   */
> > > > 
> > > > -static struct call_rcu_data *__create_call_rcu_data(unsigned long 
> > > > flags)
> > > > +static struct call_rcu_data *__create_call_rcu_data(unsigned long 
> > > > flags,
> > > > +                                                   int cpu_affinity)
> > > >  {
> > > >         struct call_rcu_data *crdp;
> > > > 
> > > > -       call_rcu_data_init(&crdp, flags);
> > > > +       call_rcu_data_init(&crdp, flags, cpu_affinity);
> > > >         return crdp;
> > > >  }
> > > > 
> > > > -struct call_rcu_data *create_call_rcu_data(unsigned long flags)
> > > > +struct call_rcu_data *create_call_rcu_data(unsigned long flags,
> > > > +                                          int cpu_affinity)
> > > >  {
> > > >         struct call_rcu_data *crdp;
> > > > 
> > > >         call_rcu_lock(&call_rcu_mutex);
> > > > -       crdp = __create_call_rcu_data(flags);
> > > > +       crdp = __create_call_rcu_data(flags, cpu_affinity);
> > > >         call_rcu_unlock(&call_rcu_mutex);
> > > >         return crdp;
> > > >  }
> > > > @@ -346,7 +383,7 @@ struct call_rcu_data *get_default_call_r
> > > >                 call_rcu_unlock(&call_rcu_mutex);
> > > >                 return default_call_rcu_data;
> > > >         }
> > > > -       call_rcu_data_init(&default_call_rcu_data, 0);
> > > > +       call_rcu_data_init(&default_call_rcu_data, 0, -1);
> > > >         call_rcu_unlock(&call_rcu_mutex);
> > > >         return default_call_rcu_data;
> > > >  }
> > > > @@ -434,7 +471,7 @@ int create_all_cpu_call_rcu_data(unsigne
> > > >                         call_rcu_unlock(&call_rcu_mutex);
> > > >                         continue;
> > > >                 }
> > > > -               crdp = __create_call_rcu_data(flags);
> > > > +               crdp = __create_call_rcu_data(flags, i);
> > > >                 if (crdp == NULL) {
> > > >                         call_rcu_unlock(&call_rcu_mutex);
> > > >                         errno = ENOMEM;
> > > > Index: userspace-rcu/urcu-call-rcu.h
> > > > ===================================================================
> > > > --- userspace-rcu.orig/urcu-call-rcu.h
> > > > +++ userspace-rcu/urcu-call-rcu.h
> > > > @@ -64,7 +64,8 @@ struct rcu_head {
> > > >   */
> > > >  struct call_rcu_data *get_cpu_call_rcu_data(int cpu);
> > > >  pthread_t get_call_rcu_thread(struct call_rcu_data *crdp);
> > > > -struct call_rcu_data *create_call_rcu_data(unsigned long flags);
> > > > +struct call_rcu_data *create_call_rcu_data(unsigned long flags,
> > > > +                                          int cpu_affinity);
> > > >  int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp);
> > > >  struct call_rcu_data *get_default_call_rcu_data(void);
> > > >  struct call_rcu_data *get_call_rcu_data(void);
> > > > Index: userspace-rcu/urcu-qsbr.c
> > > > ===================================================================
> > > > --- userspace-rcu.orig/urcu-qsbr.c
> > > > +++ userspace-rcu/urcu-qsbr.c
> > > > @@ -23,6 +23,7 @@
> > > >   * IBM's contributions to this file may be relicensed under LGPLv2 or 
> > > > later.
> > > >   */
> > > > 
> > > > +#define _GNU_SOURCE
> > > >  #include <stdio.h>
> > > >  #include <pthread.h>
> > > >  #include <signal.h>
> > > > Index: userspace-rcu/urcu.c
> > > > ===================================================================
> > > > --- userspace-rcu.orig/urcu.c
> > > > +++ userspace-rcu/urcu.c
> > > > @@ -24,6 +24,7 @@
> > > >   */
> > > > 
> > > >  #define _BSD_SOURCE
> > > > +#define _GNU_SOURCE
> > > >  #include <stdio.h>
> > > >  #include <pthread.h>
> > > >  #include <signal.h>
> > > > 
> > 
> > -- 
> > Mathieu Desnoyers
> > Operating System Efficiency R&D Consultant
> > EfficiOS Inc.
> > http://www.efficios.com

-- 
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com

_______________________________________________
ltt-dev mailing list
[email protected]
http://lists.casi.polymtl.ca/cgi-bin/mailman/listinfo/ltt-dev

Reply via email to