> -----Original Message-----
> From: Tobias Burnus <[email protected]>
> Sent: 13 January 2026 16:54
> To: Matthew Malcomson <[email protected]>; gcc-
> [email protected]; Jakub Jelinek <[email protected]>
> Cc: Julian Brown <[email protected]>; Thomas Schwinge
> <[email protected]>; Andrew Stubbs <[email protected]>; Tom de
> Vries <[email protected]>; Sebastian Huber <sebastian.huber@embedded-
> brains.de>
> Subject: Re: [PATCH 1/5] libgomp: Enforce tasks executed lexically
> after scheduled
>
> External email: Use caution opening links or attachments
>
>
> On November 26, 2025 [email protected] wrote:
> > In PR122314 we noticed that our implementation of a barrier could
> > execute tasks from the next "Task scheduling" region. This was
> > because of a race condition where a barrier could be "completed",
> and
> > some thread raced ahead to schedule another task on the "next"
> barrier
> > all before some other thread checks for a bit on the generation
> number
> > to tell if there is a task pending.
> >
> > The solution provided here is to check whether the generation number
> > has "incremented" past the state that this barrier was entered with.
> > As it happens the `state` variable already provided to
> > `gomp_barrier_handle_tasks` is enough for the targets to tell
> whether
> > the current global generation has incremented from the existing one.
> >
> > This requires some changes in the two loops in bar.c that are
> waiting
> > on tasks being available. These loops now need to check for
> > "generation has incremented" rather than "generation is identical to
> > one increment forward". Without such an adjustment of the check a
> > thread that is refusing to execute tasks because they have been
> > scheduled for the next barrier will not continue into the next
> region
> > until some other thread has completed the task (and removed the
> BAR_TASK_PENDING flag).
> >
> > This problem could be seen by a hang in testcases like
> > task-reduction-13.c.
>
> This seems to be https://gcc.gnu.org/PR88707 - can you add this PR to
> the ChangeLog?
>
> [The PR shows this issue with --disable-linux-futex (i.e. on non-
> Linux), on non-Linux, and on 32bit Linux/Sparc. The dump implies the
> issue fixed in this PR.]
>
> * * *
>
> > PR libgomp/122314
> > * config/gcn/bar.c (gomp_team_barrier_wait_end): Use
> > gomp_barrier_state_is_incremented.
> > (gomp_team_barrier_wait_cancel_end): Likewise.
> > * config/gcn/bar.h (gomp_barrier_state_is_incremented,
> > gomp_barrier_has_completed): New.
> > * config/linux/bar.c (gomp_team_barrier_wait_end): Use
> > gomp_barrier_state_is_incremented.
> > (gomp_team_barrier_wait_cancel_end): Likewise.
> > * config/linux/bar.h (gomp_barrier_state_is_incremented,
> > gomp_barrier_has_completed): New.
> > * config/nvptx/bar.h (gomp_barrier_state_is_incremented,
> > gomp_barrier_has_completed): New.
> > * config/posix/bar.c (gomp_team_barrier_wait_end): Use
> > gomp_barrier_state_is_incremented.
> > (gomp_team_barrier_wait_cancel_end): Likewise
> > * config/posix/bar.h (gomp_barrier_state_is_incremented,
> > gomp_barrier_has_completed): New.
> > * config/rtems/bar.h (gomp_barrier_state_is_incremented,
> > gomp_barrier_has_completed): New.
> > * task.c (gomp_barrier_handle_tasks): Use
> > gomp_barrier_has_completed.
> > * testsuite/libgomp.c/pr122314.c: New test.
> * * *
> > diff --git a/libgomp/config/gcn/bar.c b/libgomp/config/gcn/bar.c
> index
> > 57ac648477e..05daa8fcbbc 100644
> > --- a/libgomp/config/gcn/bar.c
> > +++ b/libgomp/config/gcn/bar.c
> > @@ -128,7 +128,7 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar,
> gomp_barrier_state_t state)
> > gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
> > }
> > }
> > - while (gen != state + BAR_INCR);
> > + while (!gomp_barrier_state_is_incremented (gen, state));
> > }
> >
> > void
> > @@ -207,7 +207,7 @@ gomp_team_barrier_wait_cancel_end
> (gomp_barrier_t *bar,
> > gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
> > }
> > }
> > - while (gen != state + BAR_INCR);
> > + while (!gomp_barrier_state_is_incremented (gen, state));
> >
> > return false;
> > }
> > diff --git a/libgomp/config/gcn/bar.h b/libgomp/config/gcn/bar.h
> index
> > b62d3af6dee..8fdd6465822 100644
> > --- a/libgomp/config/gcn/bar.h
> > +++ b/libgomp/config/gcn/bar.h
> > @@ -165,4 +165,20 @@ gomp_team_barrier_done (gomp_barrier_t *bar,
> gomp_barrier_state_t state)
> > bar->generation = (state & -BAR_INCR) + BAR_INCR;
> > }
> >
> > +static inline bool
> > +gomp_barrier_state_is_incremented (gomp_barrier_state_t gen,
> > + gomp_barrier_state_t state) {
> > + unsigned next_state = (state & -BAR_INCR) + BAR_INCR;
> > + return next_state > state ? gen >= next_state : gen < state; }
> > +
> > +static inline bool
> > +gomp_barrier_has_completed (gomp_barrier_state_t state,
> > +gomp_barrier_t *bar) {
> > + /* Handling overflow in the generation. The "next" state could
> be less than
> > + or greater than the current one. */
> > + return gomp_barrier_state_is_incremented (bar->generation,
> state);
> > +}
> > +
> > #endif /* GOMP_BARRIER_H */
>
> * * *
>
> > --- a/libgomp/task.c
> > +++ b/libgomp/task.c
> > @@ -1559,6 +1559,23 @@ gomp_barrier_handle_tasks
> (gomp_barrier_state_t state)
> > int do_wake = 0;
> >
> > gomp_mutex_lock (&team->task_lock);
> > + /* Avoid running tasks from next task scheduling region
> (PR122314).
> > + N.b. we check that `team->task_count != 0` in order to avoid
> the
> > + non-atomic read of `bar->generation` "conflicting" (in the C
> standard
> > + sense) with the atomic write of `bar->generation` in
> > + `gomp_team_barrier_wait_end`. That conflict would otherwise
> be a
> > + data-race and hence UB. One alternate approach could have
> been to
> > + atomically load `bar->generation` in
> `gomp_barrier_has_completed`.
> > +
> > + When `task_count == 0` we're not going to perform tasks
> anyway, so the
> > + problem of PR122314 is naturally avoided. */ if
> > + (team->task_count != 0
> > + && gomp_barrier_has_completed (state, &team->barrier))
> > + {
> > + gomp_mutex_unlock (&team->task_lock);
> > + return;
> > + }
> > +
> > if (gomp_barrier_last_thread (state))
> > {
> > if (team->task_count == 0)
> * * *
> > --- /dev/null
> > +++ b/libgomp/testsuite/libgomp.c/pr122314.c
> > @@ -0,0 +1,36 @@
> > +#include <omp.h>
> > +
> > +void abort ();
> > +
> > +#define NUM_THREADS 8
> > +unsigned full_data[NUM_THREADS] = {0}; void test () { #pragma omp
> > +parallel num_threads(8)
> > + {
> > +#pragma omp barrier
> > + /* Initialise so that if tasks are performed on the previous
> barrier their
> > + updates get overridden. This is a key behaviour of this
> test. */
> > + full_data[omp_get_thread_num ()] = 0; #pragma omp for
> > + for (int i = 0; i < 10; i++)
> > +#pragma omp task
> > + {
> > + full_data[omp_get_thread_num ()] += 1;
> > + }
> > + }
> > +
> > + unsigned total = 0;
> > + for (int i = 0; i < NUM_THREADS; i++)
> > + total += full_data[i];
> > +
> > + if (total != 10)
> > + abort ();
> > +}
> > +
> > +int
> > +main ()
> > +{
> > + test ();
> > +}
>
> Can you add after 'test();' a
> #pragma omp target
> test ();
>
> (This additionally needs a '#pragma omp declare target
> enter(full_data)'.)
>
> That way, also Nvptx and GCN are tested for. (However, even without
> that patch, the offloading part does not seem to fail for me on my
> laptop (Nvidia Ampere); however, the host code (x86-64) fails from
> time without the patch.)
>
> Otherwise: LGTM. Thanks for the patch and sorry for this delay.
Hi Tobias,
Thanks for the suggestions, I have added target pragmas to the test in attached
patch and PR libgomp/88707 to ChangeLog.
Does it look OK to commit ?
Thanks,
Prathamesh
>
> Tobias
libgomp: Enforce tasks executed lexically after scheduled.
In PR122314 we noticed that our implementation of a barrier could
execute tasks from the next "Task scheduling" region. This was because
of a race condition where a barrier could be "completed", and some
thread raced ahead to schedule another task on the "next" barrier all
before some other thread checks for a bit on the generation number to
tell if there is a task pending.
The solution provided here is to check whether the generation number has
"incremented" past the state that this barrier was entered with. As it
happens the `state` variable already provided to
`gomp_barrier_handle_tasks` is enough for the targets to tell whether
the current global generation has incremented from the existing one.
This requires some changes in the two loops in bar.c that are waiting on
tasks being available. These loops now need to check for "generation
has incremented" rather than "generation is identical to one increment
forward". Without such an adjustment of the check a thread that is
refusing to execute tasks because they have been scheduled for the next
barrier will not continue into the next region until some other thread
has completed the task (and removed the BAR_TASK_PENDING flag).
This problem could be seen by a hang in testcases like
task-reduction-13.c.
Testing done:
- Bootstrap & regtest on aarch64 and x86_64.
- With & without _LIBGOMP_CHECKING_.
- Testsuite with & without OMP_WAIT_POLICY=passive
- Cross compilation & regtest on arm.
- TSAN done on this as part of all my upstream patches.
libgomp/ChangeLog:
PR libgomp/122314
PR libgomp/88707
* config/gcn/bar.c (gomp_team_barrier_wait_end): Use
gomp_barrier_state_is_incremented.
(gomp_team_barrier_wait_cancel_end): Likewise.
* config/gcn/bar.h (gomp_barrier_state_is_incremented,
gomp_barrier_has_completed): New.
* config/linux/bar.c (gomp_team_barrier_wait_end): Use
gomp_barrier_state_is_incremented.
(gomp_team_barrier_wait_cancel_end): Likewise.
* config/linux/bar.h (gomp_barrier_state_is_incremented,
gomp_barrier_has_completed): New.
* config/nvptx/bar.h (gomp_barrier_state_is_incremented,
gomp_barrier_has_completed): New.
* config/posix/bar.c (gomp_team_barrier_wait_end): Use
gomp_barrier_state_is_incremented.
(gomp_team_barrier_wait_cancel_end): Likewise
* config/posix/bar.h (gomp_barrier_state_is_incremented,
gomp_barrier_has_completed): New.
* config/rtems/bar.h (gomp_barrier_state_is_incremented,
gomp_barrier_has_completed): New.
* task.c (gomp_barrier_handle_tasks): Use
gomp_barrier_has_completed.
* testsuite/libgomp.c/pr122314.c: New test.
Signed-off-by: Matthew Malcomson <[email protected]>
diff --git a/libgomp/config/gcn/bar.c b/libgomp/config/gcn/bar.c
index a655015f612..10c3f5d1362 100644
--- a/libgomp/config/gcn/bar.c
+++ b/libgomp/config/gcn/bar.c
@@ -128,7 +128,7 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar,
gomp_barrier_state_t state)
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
}
}
- while (gen != state + BAR_INCR);
+ while (!gomp_barrier_state_is_incremented (gen, state));
}
void
@@ -207,7 +207,7 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
}
}
- while (gen != state + BAR_INCR);
+ while (!gomp_barrier_state_is_incremented (gen, state));
return false;
}
diff --git a/libgomp/config/gcn/bar.h b/libgomp/config/gcn/bar.h
index 4df46960470..0507efb7d2d 100644
--- a/libgomp/config/gcn/bar.h
+++ b/libgomp/config/gcn/bar.h
@@ -165,4 +165,20 @@ gomp_team_barrier_done (gomp_barrier_t *bar,
gomp_barrier_state_t state)
bar->generation = (state & -BAR_INCR) + BAR_INCR;
}
+static inline bool
+gomp_barrier_state_is_incremented (gomp_barrier_state_t gen,
+ gomp_barrier_state_t state)
+{
+ unsigned next_state = (state & -BAR_INCR) + BAR_INCR;
+ return next_state > state ? gen >= next_state : gen < state;
+}
+
+static inline bool
+gomp_barrier_has_completed (gomp_barrier_state_t state, gomp_barrier_t *bar)
+{
+ /* Handling overflow in the generation. The "next" state could be less than
+ or greater than the current one. */
+ return gomp_barrier_state_is_incremented (bar->generation, state);
+}
+
#endif /* GOMP_BARRIER_H */
diff --git a/libgomp/config/linux/bar.c b/libgomp/config/linux/bar.c
index e850cebb51f..2a1b052b11e 100644
--- a/libgomp/config/linux/bar.c
+++ b/libgomp/config/linux/bar.c
@@ -118,7 +118,7 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar,
gomp_barrier_state_t state)
}
generation |= gen & BAR_WAITING_FOR_TASK;
}
- while (gen != state + BAR_INCR);
+ while (!gomp_barrier_state_is_incremented (gen, state));
}
void
@@ -185,7 +185,7 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
}
generation |= gen & BAR_WAITING_FOR_TASK;
}
- while (gen != state + BAR_INCR);
+ while (!gomp_barrier_state_is_incremented (gen, state));
return false;
}
diff --git a/libgomp/config/linux/bar.h b/libgomp/config/linux/bar.h
index 3ad3111f3dd..b1fff01105a 100644
--- a/libgomp/config/linux/bar.h
+++ b/libgomp/config/linux/bar.h
@@ -165,4 +165,20 @@ gomp_team_barrier_done (gomp_barrier_t *bar,
gomp_barrier_state_t state)
bar->generation = (state & -BAR_INCR) + BAR_INCR;
}
+static inline bool
+gomp_barrier_state_is_incremented (gomp_barrier_state_t gen,
+ gomp_barrier_state_t state)
+{
+ unsigned next_state = (state & -BAR_INCR) + BAR_INCR;
+ return next_state > state ? gen >= next_state : gen < state;
+}
+
+static inline bool
+gomp_barrier_has_completed (gomp_barrier_state_t state, gomp_barrier_t *bar)
+{
+ /* Handling overflow in the generation. The "next" state could be less than
+ or greater than the current one. */
+ return gomp_barrier_state_is_incremented (bar->generation, state);
+}
+
#endif /* GOMP_BARRIER_H */
diff --git a/libgomp/config/nvptx/bar.h b/libgomp/config/nvptx/bar.h
index 2ec1eb0f39b..aa2592ba5b3 100644
--- a/libgomp/config/nvptx/bar.h
+++ b/libgomp/config/nvptx/bar.h
@@ -169,4 +169,20 @@ gomp_team_barrier_done (gomp_barrier_t *bar,
gomp_barrier_state_t state)
bar->generation = (state & -BAR_INCR) + BAR_INCR;
}
+static inline bool
+gomp_barrier_state_is_incremented (gomp_barrier_state_t gen,
+ gomp_barrier_state_t state)
+{
+ unsigned next_state = (state & -BAR_INCR) + BAR_INCR;
+ return next_state > state ? gen >= next_state : gen < state;
+}
+
+static inline bool
+gomp_barrier_has_completed (gomp_barrier_state_t state, gomp_barrier_t *bar)
+{
+ /* Handling overflow in the generation. The "next" state could be less than
+ or greater than the current one. */
+ return gomp_barrier_state_is_incremented (bar->generation, state);
+}
+
#endif /* GOMP_BARRIER_H */
diff --git a/libgomp/config/posix/bar.c b/libgomp/config/posix/bar.c
index 31451cd8bfa..ce69905ba67 100644
--- a/libgomp/config/posix/bar.c
+++ b/libgomp/config/posix/bar.c
@@ -156,7 +156,7 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar,
gomp_barrier_state_t state)
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
}
}
- while (gen != state + BAR_INCR);
+ while (!gomp_barrier_state_is_incremented (gen, state));
#ifdef HAVE_SYNC_BUILTINS
n = __sync_add_and_fetch (&bar->arrived, -1);
@@ -228,7 +228,7 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
break;
}
}
- while (gen != state + BAR_INCR);
+ while (!gomp_barrier_state_is_incremented (gen, state));
#ifdef HAVE_SYNC_BUILTINS
n = __sync_add_and_fetch (&bar->arrived, -1);
diff --git a/libgomp/config/posix/bar.h b/libgomp/config/posix/bar.h
index 33d25592daa..5a175c228c2 100644
--- a/libgomp/config/posix/bar.h
+++ b/libgomp/config/posix/bar.h
@@ -155,4 +155,20 @@ gomp_team_barrier_done (gomp_barrier_t *bar,
gomp_barrier_state_t state)
bar->generation = (state & -BAR_INCR) + BAR_INCR;
}
+static inline bool
+gomp_barrier_state_is_incremented (gomp_barrier_state_t gen,
+ gomp_barrier_state_t state)
+{
+ unsigned next_state = (state & -BAR_INCR) + BAR_INCR;
+ return next_state > state ? gen >= next_state : gen < state;
+}
+
+static inline bool
+gomp_barrier_has_completed (gomp_barrier_state_t state, gomp_barrier_t *bar)
+{
+ /* Handling overflow in the generation. The "next" state could be less than
+ or greater than the current one. */
+ return gomp_barrier_state_is_incremented (bar->generation, state);
+}
+
#endif /* GOMP_BARRIER_H */
diff --git a/libgomp/config/rtems/bar.h b/libgomp/config/rtems/bar.h
index 27326db9c77..61fa91f300f 100644
--- a/libgomp/config/rtems/bar.h
+++ b/libgomp/config/rtems/bar.h
@@ -167,4 +167,20 @@ gomp_team_barrier_done (gomp_barrier_t *bar,
gomp_barrier_state_t state)
bar->generation = (state & -BAR_INCR) + BAR_INCR;
}
+static inline bool
+gomp_barrier_state_is_incremented (gomp_barrier_state_t gen,
+ gomp_barrier_state_t state)
+{
+ unsigned next_state = (state & -BAR_INCR) + BAR_INCR;
+ return next_state > state ? gen >= next_state : gen < state;
+}
+
+static inline bool
+gomp_barrier_has_completed (gomp_barrier_state_t state, gomp_barrier_t *bar)
+{
+ /* Handling overflow in the generation. The "next" state could be less than
+ or greater than the current one. */
+ return gomp_barrier_state_is_incremented (bar->generation, state);
+}
+
#endif /* GOMP_BARRIER_H */
diff --git a/libgomp/task.c b/libgomp/task.c
index a6f21b05687..554636aadd5 100644
--- a/libgomp/task.c
+++ b/libgomp/task.c
@@ -1559,6 +1559,23 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state)
int do_wake = 0;
gomp_mutex_lock (&team->task_lock);
+ /* Avoid running tasks from next task scheduling region (PR122314).
+ N.b. we check that `team->task_count != 0` in order to avoid the
+ non-atomic read of `bar->generation` "conflicting" (in the C standard
+ sense) with the atomic write of `bar->generation` in
+ `gomp_team_barrier_wait_end`. That conflict would otherwise be a
+ data-race and hence UB. One alternate approach could have been to
+ atomically load `bar->generation` in `gomp_barrier_has_completed`.
+
+ When `task_count == 0` we're not going to perform tasks anyway, so the
+ problem of PR122314 is naturally avoided. */
+ if (team->task_count != 0
+ && gomp_barrier_has_completed (state, &team->barrier))
+ {
+ gomp_mutex_unlock (&team->task_lock);
+ return;
+ }
+
if (gomp_barrier_last_thread (state))
{
if (team->task_count == 0)
diff --git a/libgomp/testsuite/libgomp.c/pr122314.c
b/libgomp/testsuite/libgomp.c/pr122314.c
new file mode 100644
index 00000000000..bb9565de726
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/pr122314.c
@@ -0,0 +1,42 @@
+#include <omp.h>
+
+void abort ();
+
+#define NUM_THREADS 8
+unsigned full_data[NUM_THREADS] = {0};
+#pragma omp declare target enter(full_data)
+
+void
+test ()
+{
+#pragma omp parallel num_threads(8)
+ {
+#pragma omp barrier
+ /* Initialise so that if tasks are performed on the previous barrier their
+ updates get overridden. This is a key behaviour of this test. */
+ full_data[omp_get_thread_num ()] = 0;
+#pragma omp for
+ for (int i = 0; i < 10; i++)
+#pragma omp task
+ {
+ full_data[omp_get_thread_num ()] += 1;
+ }
+ }
+
+ unsigned total = 0;
+ for (int i = 0; i < NUM_THREADS; i++)
+ total += full_data[i];
+
+ if (total != 10)
+ abort ();
+}
+#pragma omp declare target enter(test)
+
+int
+main ()
+{
+ test ();
+
+#pragma omp target
+ test ();
+}