On Mon, Nov 28, 2011 at 08:09:01PM +1030, Alan Modra wrote:
> On Mon, Nov 28, 2011 at 09:13:27AM +0100, Jakub Jelinek wrote:
> > On Mon, Nov 28, 2011 at 10:12:09AM +1030, Alan Modra wrote:
> > > --- libgomp/config/linux/bar.c    (revision 181718)
> > > +++ libgomp/config/linux/bar.c    (working copy)
> > > @@ -36,18 +36,15 @@ gomp_barrier_wait_end (gomp_barrier_t *b
> > >    if (__builtin_expect ((state & 1) != 0, 0))
> > >      {
> > >        /* Next time we'll be awaiting TOTAL threads again.  */
> > > -      bar->awaited = bar->total;
> > > -      atomic_write_barrier ();
> > > -      bar->generation += 4;
> > > +      __atomic_store_4 (&bar->awaited, bar->total, MEMMODEL_RELEASE);
> > > +      __atomic_add_fetch (&bar->generation, 4, MEMMODEL_ACQ_REL);
> > >        futex_wake ((int *) &bar->generation, INT_MAX);
> > >      }
> > 
> > The above is unfortunate, bar->generation should be only modified from a
> > single thread at this point, but the __atomic_add_fetch will enforce there
> > an atomic instruction on it rather than just some kind of barrier.
> 
> I will try without the atomic add and see what happens.

Seems to be fine.  I took out the extra write barriers too, so we now
just have two MEMMODEL_RELEASE atomic store barriers replacing the two
old atomic_write_barriers, and a number of new acquire barriers.

        PR libgomp/51298
        * config/linux/bar.h: Use atomic rather than sync builtins.
        * config/linux/bar.c: Likewise.  Add missing acquire
        synchronisation on generation field.
        * task.c (gomp_barrier_handle_tasks): Regain lock so as to not
        double unlock.

Index: libgomp/task.c
===================================================================
--- libgomp/task.c      (revision 181718)
+++ libgomp/task.c      (working copy)
@@ -273,6 +273,7 @@ gomp_barrier_handle_tasks (gomp_barrier_
              gomp_team_barrier_done (&team->barrier, state);
              gomp_mutex_unlock (&team->task_lock);
              gomp_team_barrier_wake (&team->barrier, 0);
+             gomp_mutex_lock (&team->task_lock);
            }
        }
     }
Index: libgomp/config/linux/bar.h
===================================================================
--- libgomp/config/linux/bar.h  (revision 181770)
+++ libgomp/config/linux/bar.h  (working copy)
@@ -50,7 +50,7 @@ static inline void gomp_barrier_init (go
 
 static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count)
 {
-  __sync_fetch_and_add (&bar->awaited, count - bar->total);
+  __atomic_add_fetch (&bar->awaited, count - bar->total, MEMMODEL_ACQ_REL);
   bar->total = count;
 }
 
@@ -69,10 +69,8 @@ extern void gomp_team_barrier_wake (gomp
 static inline gomp_barrier_state_t
 gomp_barrier_wait_start (gomp_barrier_t *bar)
 {
-  unsigned int ret = bar->generation & ~3;
-  /* Do we need any barrier here or is __sync_add_and_fetch acting
-     as the needed LoadLoad barrier already?  */
-  ret += __sync_add_and_fetch (&bar->awaited, -1) == 0;
+  unsigned int ret = __atomic_load_4 (&bar->generation, MEMMODEL_ACQUIRE) & ~3;
+  ret += __atomic_add_fetch (&bar->awaited, -1, MEMMODEL_ACQ_REL) == 0;
   return ret;
 }
 
Index: libgomp/config/linux/bar.c
===================================================================
--- libgomp/config/linux/bar.c  (revision 181770)
+++ libgomp/config/linux/bar.c  (working copy)
@@ -37,17 +37,15 @@ gomp_barrier_wait_end (gomp_barrier_t *b
     {
       /* Next time we'll be awaiting TOTAL threads again.  */
       bar->awaited = bar->total;
-      atomic_write_barrier ();
-      bar->generation += 4;
+      __atomic_store_4 (&bar->generation, bar->generation + 4,
+                       MEMMODEL_RELEASE);
       futex_wake ((int *) &bar->generation, INT_MAX);
     }
   else
     {
-      unsigned int generation = state;
-
       do
-       do_wait ((int *) &bar->generation, generation);
-      while (bar->generation == generation);
+       do_wait ((int *) &bar->generation, state);
+      while (__atomic_load_4 (&bar->generation, MEMMODEL_ACQUIRE) == state);
     }
 }
 
@@ -81,15 +79,15 @@ gomp_team_barrier_wake (gomp_barrier_t *
 void
 gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
 {
-  unsigned int generation;
+  unsigned int generation, gen;
 
   if (__builtin_expect ((state & 1) != 0, 0))
     {
       /* Next time we'll be awaiting TOTAL threads again.  */
       struct gomp_thread *thr = gomp_thread ();
       struct gomp_team *team = thr->ts.team;
+
       bar->awaited = bar->total;
-      atomic_write_barrier ();
       if (__builtin_expect (team->task_count, 0))
        {
          gomp_barrier_handle_tasks (state);
@@ -97,7 +95,7 @@ gomp_team_barrier_wait_end (gomp_barrier
        }
       else
        {
-         bar->generation = state + 3;
+         __atomic_store_4 (&bar->generation, state + 3, MEMMODEL_RELEASE);
          futex_wake ((int *) &bar->generation, INT_MAX);
          return;
        }
@@ -107,12 +105,16 @@ gomp_team_barrier_wait_end (gomp_barrier
   do
     {
       do_wait ((int *) &bar->generation, generation);
-      if (__builtin_expect (bar->generation & 1, 0))
-       gomp_barrier_handle_tasks (state);
-      if ((bar->generation & 2))
+      gen = __atomic_load_4 (&bar->generation, MEMMODEL_ACQUIRE);
+      if (__builtin_expect (gen & 1, 0))
+       {
+         gomp_barrier_handle_tasks (state);
+         gen = __atomic_load_4 (&bar->generation, MEMMODEL_ACQUIRE);
+       }
+      if ((gen & 2) != 0)
        generation |= 2;
     }
-  while (bar->generation != state + 4);
+  while (gen != state + 4);
 }
 
 void

-- 
Alan Modra
Australia Development Lab, IBM

Reply via email to