Please ignore my previous comment. I misunderstanding that the offset will
affect the gpgpu thread dispatching which actually don't work that way.

So this patch LGTM, will push it latter. Thanks.

On Mon, Sep 23, 2013 at 02:04:08PM +0800, Yang Rong wrote:
> Set to global offset to 0 in walker, and add global offset when get_global_id.
> 
> Signed-off-by: Yang Rong <rong.r.y...@intel.com>
> ---
>  backend/src/ocl_stdlib.tmpl.h | 2 +-
>  src/cl_api.c                  | 7 -------
>  src/intel/intel_gpgpu.c       | 6 +++---
>  3 files changed, 4 insertions(+), 11 deletions(-)
> 
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index 9b76ba1..4921ee4 100644
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -588,7 +588,7 @@ DECL_PUBLIC_WORK_ITEM_FN(get_num_groups, 1)
>  #undef DECL_PUBLIC_WORK_ITEM_FN
>  
>  INLINE uint get_global_id(uint dim) {
> -  return get_local_id(dim) + get_local_size(dim) * get_group_id(dim);
> +  return get_local_id(dim) + get_local_size(dim) * get_group_id(dim) + 
> get_global_offset(dim);
>  }
>  
>  /////////////////////////////////////////////////////////////////////////////
> diff --git a/src/cl_api.c b/src/cl_api.c
> index b60160b..c19b80a 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -2264,19 +2264,12 @@ clEnqueueNDRangeKernel(cl_command_queue  
> command_queue,
>      goto error;
>    }
>  
> -  /* Check offset values. We add a non standard restriction. The offsets must
> -   * also be evenly divided by the local sizes
> -   */
>    if (global_work_offset != NULL)
>      for (i = 0; i < work_dim; ++i) {
>        if (UNLIKELY(~0LL - global_work_offset[i] > global_work_size[i])) {
>          err = CL_INVALID_GLOBAL_OFFSET;
>          goto error;
>        }
> -      if (UNLIKELY(local_work_size != NULL && global_work_offset[i] % 
> local_work_size[i])) {
> -        err = CL_INVALID_GLOBAL_OFFSET;
> -        goto error;
> -      }
>      }
>  
>    /* Local sizes must be non-null and divide global sizes */
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index 7b82b76..44f44ef 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -886,11 +886,11 @@ intel_gpgpu_walker(intel_gpgpu_t *gpgpu,
>      OUT_BATCH(gpgpu->batch, (1 << 30) | (thread_n-1)); /* SIMD16 | thread 
> max */
>    else
>      OUT_BATCH(gpgpu->batch, (0 << 30) | (thread_n-1)); /* SIMD8  | thread 
> max */
> -  OUT_BATCH(gpgpu->batch, global_wk_off[0]);
> +  OUT_BATCH(gpgpu->batch, 0);
>    OUT_BATCH(gpgpu->batch, global_wk_dim[0]);
> -  OUT_BATCH(gpgpu->batch, global_wk_off[1]);
> +  OUT_BATCH(gpgpu->batch, 0);
>    OUT_BATCH(gpgpu->batch, global_wk_dim[1]);
> -  OUT_BATCH(gpgpu->batch, global_wk_off[2]);
> +  OUT_BATCH(gpgpu->batch, 0);
>    OUT_BATCH(gpgpu->batch, global_wk_dim[2]);
>    OUT_BATCH(gpgpu->batch, right_mask);
>    OUT_BATCH(gpgpu->batch, ~0x0);                     /* we always set height 
> as 1, so set bottom mask as all 1*/
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to