So can I understand like this: TLS (Thread local storage) is a global section map to each thread's space. Each thread keep one copy of this section's copy. And thread_specific is in heap, using sync function to manage the resource for each thread. ?
On Fri, 2013-11-08 at 02:58 +0000, Zou, Nanhai wrote: > TLS (Thread local storage) is useful for convert legacy thread unsafe program > into thread-safe. > E.g. errno in glibc. > > But for this case, I think explicitly separate the thread specific data is > better. > Not only for thread safe, but also for later optimization. > This help us to collect all data that will be modified during NDRange. > > Thanks > Zou Nanhai > > -----Original Message----- > From: beignet-boun...@lists.freedesktop.org > [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Song, Ruiling > Sent: Friday, November 08, 2013 10:38 AM > To: Zhigang Gong; junyan...@inbox.com > Cc: Junyan He; beignet@lists.freedesktop.org > Subject: Re: [Beignet] [PATCH] Move the gpgpu struct from cl_command_queue to > thread specific context > > I am really new to the keyword "__thread", and have a quick look at docs on > the web: > http://gcc.gnu.org/onlinedocs/gcc-3.3.1/gcc/Thread-Local.html#Thread-Local > it says: "The __thread specifier may be applied to any global, file-scoped > static, function-scoped static, or static data member of a class. It may not > be applied to block-scoped automatic or non-static data member. " > From my understanding, this is not proper for our case. > > Thanks! > Ruiling > -----Original Message----- > From: beignet-boun...@lists.freedesktop.org > [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Zhigang Gong > Sent: Friday, November 08, 2013 8:29 AM > To: junyan...@inbox.com > Cc: Junyan He; beignet@lists.freedesktop.org > Subject: Re: [Beignet] [PATCH] Move the gpgpu struct from cl_command_queue to > thread specific context > > I agree with you that use thread data is better than locking. > One comment, how about to use thread local storage to simplify this patch as > below: > > struct _cl_commonand_queue { > ... > __thread cl_gpgpu gpgpu; > ... > }; > > Then in the initialization stage, set it to NULL; > > queue->gpgpu = NULL; > > In the head of each functions which use queue->gpgpu, add the following > code: > > if (queue->gpgpu == NULL) > TRY_ALLOC_NO_ERR (queue->gpgpu, cl_gpgpu_new(ctx->drv)); > > Then we don't need to change any other code? > > What's your opinion? > > On Fri, Nov 08, 2013 at 12:58:00AM +0800, junyan...@inbox.com wrote: > > From: Junyan He <junyan...@linux.intel.com> > > > > We find some cases will use multi-threads to run on the same queue, > > executing the same kernel. This will cause the gpgpu struct which is > > very important for GPU context setting be destroyed because we do not > > implement any sync protect on it now. > > Move the gpgpu struct into thread specific space will fix this problem > > because the lib_drm will do the GPU command serialization for us. > > --- > > src/CMakeLists.txt | 1 + > > src/cl_command_queue.c | 27 +++++++++++----- > > src/cl_command_queue.h | 9 +++++- > > src/cl_command_queue_gen7.c | 7 +++-- > > src/cl_event.c | 6 ++-- > > src/cl_thread.c | 72 > > +++++++++++++++++++++++++++++++++++++++++++ > > src/cl_thread.h | 34 ++++++++++++++++++++ > > utests/CMakeLists.txt | 2 +- > > 8 files changed, 144 insertions(+), 14 deletions(-) create mode > > 100644 src/cl_thread.c create mode 100644 src/cl_thread.h > > > > diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index > > 1e28c6c..59d330e 100644 > > --- a/src/CMakeLists.txt > > +++ b/src/CMakeLists.txt > > @@ -39,6 +39,7 @@ set(OPENCL_SRC > > cl_command_queue.c > > cl_command_queue.h > > cl_command_queue_gen7.c > > + cl_thread.c > > cl_driver.h > > cl_driver.cpp > > cl_driver_defs.c > > diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index > > 3f9d95c..3530976 100644 > > --- a/src/cl_command_queue.c > > +++ b/src/cl_command_queue.c > > @@ -24,6 +24,7 @@ > > #include "cl_device_id.h" > > #include "cl_mem.h" > > #include "cl_utils.h" > > +#include "cl_thread.h" > > #include "cl_alloc.h" > > #include "cl_driver.h" > > #include "cl_khr_icd.h" > > @@ -43,7 +44,9 @@ cl_command_queue_new(cl_context ctx) > > queue->magic = CL_MAGIC_QUEUE_HEADER; > > queue->ref_n = 1; > > queue->ctx = ctx; > > - TRY_ALLOC_NO_ERR (queue->gpgpu, cl_gpgpu_new(ctx->drv)); > > + if ((queue->thread_data = cl_thread_data_create()) == NULL) { > > + goto error; > > + } > > > > /* Append the command queue in the list */ > > pthread_mutex_lock(&ctx->queue_lock); > > @@ -84,9 +87,11 @@ cl_command_queue_delete(cl_command_queue queue) > > cl_mem_delete(queue->fulsim_out); > > queue->fulsim_out = NULL; > > } > > + > > + cl_thread_data_destroy(queue->thread_data); > > + queue->thread_data = NULL; > > cl_mem_delete(queue->perf); > > cl_context_delete(queue->ctx); > > - cl_gpgpu_delete(queue->gpgpu); > > cl_free(queue->wait_events); > > queue->magic = CL_MAGIC_DEAD_HEADER; /* For safety */ > > cl_free(queue); > > @@ -119,13 +124,15 @@ LOCAL cl_int > > cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k) { > > uint32_t i; > > + GET_QUEUE_THREAD_GPGPU(queue); > > + > > for (i = 0; i < k->image_sz; i++) { > > int id = k->images[i].arg_idx; > > struct _cl_mem_image *image; > > assert(gbe_kernel_get_arg_type(k->opaque, id) == GBE_ARG_IMAGE); > > image = cl_mem_image(k->args[id].mem); > > set_image_info(k->curbe, &k->images[i], image); > > - cl_gpgpu_bind_image(queue->gpgpu, k->images[i].idx, image->base.bo, > > image->offset, > > + cl_gpgpu_bind_image(gpgpu, k->images[i].idx, image->base.bo, > > + image->offset, > > image->intel_fmt, image->image_type, > > image->w, image->h, image->depth, > > image->row_pitch, image->tiling); @@ -136,6 > > +143,8 @@ cl_command_queue_bind_image(cl_command_queue queue, > > cl_kernel k) LOCAL cl_int > > cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k) { > > + GET_QUEUE_THREAD_GPGPU(queue); > > + > > /* Bind all user buffers (given by clSetKernelArg) */ > > uint32_t i; > > enum gbe_arg_type arg_type; /* kind of argument */ @@ -147,9 +156,9 > > @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k) > > offset = gbe_kernel_get_curbe_offset(k->opaque, > > GBE_CURBE_KERNEL_ARGUMENT, i); > > if (k->args[i].mem->type == CL_MEM_SUBBUFFER_TYPE) { > > struct _cl_mem_buffer* buffer = (struct > > _cl_mem_buffer*)k->args[i].mem; > > - cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, > > buffer->sub_offset, cc_llc_l3); > > + cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, > > + buffer->sub_offset, cc_llc_l3); > > } else { > > - cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, 0, > > cc_llc_l3); > > + cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, 0, > > + cc_llc_l3); > > } > > } > > > > @@ -407,14 +416,18 @@ error: > > LOCAL cl_int > > cl_command_queue_flush(cl_command_queue queue) { > > - cl_gpgpu_flush(queue->gpgpu); > > + GET_QUEUE_THREAD_GPGPU(queue); > > + > > + cl_gpgpu_flush(gpgpu); > > return CL_SUCCESS; > > } > > > > LOCAL cl_int > > cl_command_queue_finish(cl_command_queue queue) { > > - cl_gpgpu_sync(queue->gpgpu); > > + GET_QUEUE_THREAD_GPGPU(queue); > > + > > + cl_gpgpu_sync(gpgpu); > > return CL_SUCCESS; > > } > > > > diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h index > > 9396fd7..40c272c 100644 > > --- a/src/cl_command_queue.h > > +++ b/src/cl_command_queue.h > > @@ -22,6 +22,7 @@ > > > > #include "cl_internals.h" > > #include "cl_driver.h" > > +#include "cl_thread.h" > > #include "CL/cl.h" > > #include <stdint.h> > > > > @@ -40,11 +41,17 @@ struct _cl_command_queue { > > cl_event last_event; /* The last event in the queue, for > > enqueue mark used */ > > cl_command_queue_properties props; /* Queue properties */ > > cl_command_queue prev, next; /* We chain the command queues > > together */ > > - cl_gpgpu gpgpu; /* Setup all GEN commands */ > > + void *thread_data; /* Used to store thread context > > data */ > > cl_mem perf; /* Where to put the perf counters */ > > cl_mem fulsim_out; /* Fulsim will output this buffer */ > > }; > > > > +/* The macro to get the thread specified gpgpu struct. */ #define > > +GET_QUEUE_THREAD_GPGPU(queue) \ > > + cl_gpgpu gpgpu = queue ? cl_get_thread_gpgpu(queue) : NULL; \ > > + if (queue) \ > > + assert(gpgpu); > > + > > /* Allocate and initialize a new command queue. Also insert it in the list > > of > > * command queue in the associated context > > */ > > diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c > > index 65f8e17..fb7a8a3 100644 > > --- a/src/cl_command_queue_gen7.c > > +++ b/src/cl_command_queue_gen7.c > > @@ -99,6 +99,7 @@ static void > > cl_upload_constant_buffer(cl_command_queue queue, cl_kernel ker) { > > /* calculate constant buffer size */ > > + GET_QUEUE_THREAD_GPGPU(queue); > > int32_t arg; > > size_t offset; > > gbe_program prog = ker->program->opaque; @@ -115,7 +116,7 @@ > > cl_upload_constant_buffer(cl_command_queue queue, cl_kernel ker) > > if(global_const_size == 0 && constant_buf_size == 0) > > return; > > > > - cl_buffer bo = cl_gpgpu_alloc_constant_buffer(queue->gpgpu, > > constant_buf_size + global_const_size + 4); > > + cl_buffer bo = cl_gpgpu_alloc_constant_buffer(gpgpu, > > + constant_buf_size + global_const_size + 4); > > cl_buffer_map(bo, 1); > > char * cst_addr = cl_buffer_get_virtual(bo); > > offset = 0; > > @@ -256,8 +257,8 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue, > > const size_t *global_wk_sz, > > const size_t *local_wk_sz) { > > + GET_QUEUE_THREAD_GPGPU(queue); > > cl_context ctx = queue->ctx; > > - cl_gpgpu gpgpu = queue->gpgpu; > > char *final_curbe = NULL; /* Includes them and one sub-buffer per group > > */ > > cl_gpgpu_kernel kernel; > > const uint32_t simd_sz = cl_kernel_get_simd_width(ker); @@ -297,7 > > +298,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue, > > /* Bind user images */ > > cl_command_queue_bind_image(queue, ker); > > /* Bind all samplers */ > > - cl_gpgpu_bind_sampler(queue->gpgpu, ker->samplers, > > ker->sampler_sz); > > + cl_gpgpu_bind_sampler(gpgpu, ker->samplers, ker->sampler_sz); > > > > cl_setup_scratch(gpgpu, ker); > > /* Bind a stack if needed */ > > diff --git a/src/cl_event.c b/src/cl_event.c index 1dc02ae..028dfb6 > > 100644 > > --- a/src/cl_event.c > > +++ b/src/cl_event.c > > @@ -48,6 +48,7 @@ cl_event_is_gpu_command_type(cl_command_type type) > > cl_event cl_event_new(cl_context ctx, cl_command_queue queue, > > cl_command_type type, cl_bool emplict) { > > cl_event event = NULL; > > + GET_QUEUE_THREAD_GPGPU(queue); > > > > /* Allocate and inialize the structure itself */ > > TRY_ALLOC_NO_ERR (event, CALLOC(struct _cl_event)); @@ -75,7 +76,7 > > @@ cl_event cl_event_new(cl_context ctx, cl_command_queue queue, > > cl_command_type ty > > else { > > event->status = CL_QUEUED; > > if(cl_event_is_gpu_command_type(event->type)) > > - event->gpgpu_event = cl_gpgpu_event_new(queue->gpgpu); > > + event->gpgpu_event = cl_gpgpu_event_new(gpgpu); > > } > > cl_event_add_ref(event); //dec when complete > > event->user_cb = NULL; > > @@ -257,6 +258,7 @@ void cl_event_new_enqueue_callback(cl_event event, > > user_event *user_events, *u_ev; > > cl_command_queue queue = event->queue; > > cl_int i; > > + GET_QUEUE_THREAD_GPGPU(data->queue); > > > > /* Allocate and inialize the structure itself */ > > TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback)); @@ -336,7 +338,7 > > @@ void cl_event_new_enqueue_callback(cl_event event, > > } > > } > > if(data->queue != NULL && event->gpgpu_event != NULL) { > > - cl_gpgpu_event_pending(data->queue->gpgpu, event->gpgpu_event); > > + cl_gpgpu_event_pending(gpgpu, event->gpgpu_event); > > data->ptr = (void *)event->gpgpu_event; > > } > > cb->data = *data; > > diff --git a/src/cl_thread.c b/src/cl_thread.c new file mode 100644 > > index 0000000..fbad5c5 > > --- /dev/null > > +++ b/src/cl_thread.c > > @@ -0,0 +1,72 @@ > > +/* > > + * Copyright (c) 2012 Intel Corporation > > + * > > + * This library is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU Lesser General Public > > + * License as published by the Free Software Foundation; either > > + * version 2 of the License, or (at your option) any later version. > > + * > > + * This library is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + * Lesser General Public License for more details. > > + * > > + * You should have received a copy of the GNU Lesser General Public > > + * License along with this library. If not, see > > <http://www.gnu.org/licenses/>. > > + * > > + */ > > + > > +#include "cl_thread.h" > > +#include "cl_alloc.h" > > +#include "cl_utils.h" > > + > > +cl_gpgpu cl_get_thread_gpgpu(cl_command_queue queue) { > > + pthread_key_t* key = queue->thread_data; > > + cl_gpgpu gpgpu = pthread_getspecific(*key); > > + > > + if (!gpgpu) { > > + TRY_ALLOC_NO_ERR (gpgpu, cl_gpgpu_new(queue->ctx->drv)); } > > + > > + if (pthread_setspecific(*key, gpgpu)) { > > + cl_gpgpu_delete(gpgpu); > > + goto error; > > + } > > + > > +exit: > > + return gpgpu; > > +error: > > + pthread_setspecific(*key, NULL); > > + goto exit; > > +} > > + > > +static void thread_data_destructor(void *data) { > > + cl_gpgpu gpgpu = (cl_gpgpu)data; > > + cl_gpgpu_delete(gpgpu); > > +} > > + > > +/* Create the thread specific data. */ > > +void* cl_thread_data_create(void) > > +{ > > + int rc = 0; > > + > > + pthread_key_t *thread_specific_key = CALLOC(pthread_key_t); if > > + (thread_specific_key == NULL) > > + return NULL; > > + > > + rc = pthread_key_create(thread_specific_key, > > + thread_data_destructor); > > + > > + if (rc != 0) > > + return NULL; > > + > > + return thread_specific_key; > > +} > > + > > +/* The destructor for clean the thread specific data. */ void > > +cl_thread_data_destroy(void * data) { > > + pthread_key_t *thread_specific_key = (pthread_key_t *)data; > > + pthread_key_delete(*thread_specific_key); > > + cl_free(thread_specific_key); > > +} > > diff --git a/src/cl_thread.h b/src/cl_thread.h new file mode 100644 > > index 0000000..65f1bcf > > --- /dev/null > > +++ b/src/cl_thread.h > > @@ -0,0 +1,34 @@ > > +/* > > + * Copyright (c) 2012 Intel Corporation > > + * > > + * This library is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU Lesser General Public > > + * License as published by the Free Software Foundation; either > > + * version 2 of the License, or (at your option) any later version. > > + * > > + * This library is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + * Lesser General Public License for more details. > > + * > > + * You should have received a copy of the GNU Lesser General Public > > + * License along with this library. If not, see > > <http://www.gnu.org/licenses/>. > > + * > > + */ > > + > > +#ifndef __CL_THREAD_H__ > > +#define __CL_THREAD_H__ > > + > > +#include <pthread.h> > > +#include "cl_internals.h" > > +#include "cl_command_queue.h" > > + > > +/* Create the thread specific data. */ > > +void* cl_thread_data_create(void); > > + > > +/* The destructor for clean the thread specific data. */ void > > +cl_thread_data_destroy(void * data); > > + > > +/* Used to get the gpgpu struct of each thread. */ cl_gpgpu > > +cl_get_thread_gpgpu(cl_command_queue queue); #endif /* > > +__CL_THREAD_H__ */ > > diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index > > c87cba8..5e0bc19 100644 > > --- a/utests/CMakeLists.txt > > +++ b/utests/CMakeLists.txt > > @@ -53,7 +53,7 @@ set (utests_sources > > compiler_if_else.cpp > > compiler_integer_division.cpp > > compiler_integer_remainder.cpp > > - compiler_insert_vector.cpp > > + compiler_insert_vector.cpp > > compiler_lower_return0.cpp > > compiler_lower_return1.cpp > > compiler_lower_return2.cpp > > -- > > 1.7.9.5 > > > > > > > > _______________________________________________ > > Beignet mailing list > > Beignet@lists.freedesktop.org > > http://lists.freedesktop.org/mailman/listinfo/beignet > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet