On Tue, Oct 20, 2015 at 09:34:32PM +0300, Alexander Monakov wrote: > diff --git a/libgomp/config/nvptx/team.c b/libgomp/config/nvptx/team.c > deleted file mode 100644 > index e69de29..0000000 > diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h > index 1454adf..f25b265 100644 > --- a/libgomp/libgomp.h > +++ b/libgomp/libgomp.h > @@ -483,7 +483,15 @@ enum gomp_cancel_kind > > /* ... and here is that TLS data. */ > > -#if defined HAVE_TLS || defined USE_EMUTLS > +#if defined __nvptx__ > +extern struct gomp_thread *nvptx_thrs;
What kind of address space is this variable? It should be a per-CTA var, so that different teams have different, and simultaneous target regions have different too. > +static inline struct gomp_thread *gomp_thread (void) > +{ > + int tid; > + asm ("mov.u32 %0, %%tid.y;" : "=r" (tid)); > + return nvptx_thrs + tid; > +} > +#elif defined HAVE_TLS || defined USE_EMUTLS Other than that, yes, such bit of nvptx specific stuff is acceptable here. > extern __thread struct gomp_thread gomp_tls_data; > static inline struct gomp_thread *gomp_thread (void) > { > diff --git a/libgomp/team.c b/libgomp/team.c > index 7671b05..5b74532 100644 > --- a/libgomp/team.c > +++ b/libgomp/team.c > @@ -30,6 +30,7 @@ > #include <stdlib.h> > #include <string.h> > > +#ifdef LIBGOMP_USE_PTHREADS > /* This attribute contains PTHREAD_CREATE_DETACHED. */ > pthread_attr_t gomp_thread_attr; > > @@ -43,6 +44,7 @@ __thread struct gomp_thread gomp_tls_data; > #else > pthread_key_t gomp_tls_key; > #endif > +#endif I'm surprised that for team.c you chose to adjust the shared source, rather than copy and remove all the cruft you don't need/want. That includes the LIBGOMP_USE_PTHREADS guarded parts, all the thread binding stuff etc. I'd like to see at least for comparison how much actually remained in there. > @@ -58,6 +60,52 @@ struct gomp_thread_start_data > bool nested; > }; > > +#ifdef __nvptx__ > +struct gomp_thread *nvptx_thrs; > + > +static struct gomp_thread_pool *gomp_new_thread_pool (void); > +static void *gomp_thread_start (void *); > + > +void __attribute__((kernel)) > +gomp_nvptx_main (void (*fn) (void *), void *fn_data) > +{ > + int ntids, tid, laneid; > + asm ("mov.u32 %0, %%laneid;" : "=r" (laneid)); > + if (laneid) > + return; > + static struct gomp_thread_pool *pool; > + asm ("mov.u32 %0, %%tid.y;" : "=r" (tid)); > + asm ("mov.u32 %0, %%ntid.y;" : "=r"(ntids)); > + if (tid == 0) > + { > + gomp_global_icv.nthreads_var = ntids; > + > + nvptx_thrs = gomp_malloc_cleared (ntids * sizeof (*nvptx_thrs)); > + > + pool = gomp_new_thread_pool (); > + pool->threads = gomp_malloc (ntids * sizeof (*pool->threads)); > + pool->threads[0] = nvptx_thrs; > + pool->threads_size = ntids; > + pool->threads_used = ntids; > + gomp_barrier_init (&pool->threads_dock, ntids); > + > + nvptx_thrs[0].thread_pool = pool; > + asm ("bar.sync 0;"); > + fn (fn_data); > + > + gomp_free_thread (nvptx_thrs); > + free (nvptx_thrs); > + } > + else > + { > + struct gomp_thread_start_data tsdata = {0}; > + tsdata.ts.team_id = tid; > + asm ("bar.sync 0;"); > + tsdata.thread_pool = pool; > + gomp_thread_start (&tsdata); > + } > +} > +#endif If nvptx is going to use the toplevel team.c, then at least this should not go in there, it is sufficiently large, and you can just stick it into config/nvptx/team.c and include the toplevel team.c from there. > @@ -88,7 +138,8 @@ gomp_thread_start (void *xdata) > thr->task = data->task; > thr->place = data->place; > > - thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; > + if (thr->ts.team) > + thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; Why this? Lots of gomp_thread_start other places assume thr->ts.team is non-NULL. And, this isn't even guarded with __nvptx__, we don't want to slow down host thread start. Jakub