I notice this should be caused by event timestamp recording sequence.
We submit the NDRange and then record the queued timestamp, this is
wrong. I have already sent an another patch to "Improve event execute function"
to modify this. You can have a try, apply that patch based on this patch.
thanks.

On Thu, Dec 22, 2016 at 06:41:49AM +0000, Pan, Xiuli wrote:
> Date: Thu, 22 Dec 2016 06:41:49 +0000
> From: "Pan, Xiuli" <xiuli....@intel.com>
> To: "junyan...@inbox.com" <junyan...@inbox.com>,
>  "beignet@lists.freedesktop.org" <beignet@lists.freedesktop.org>
> Subject: Re: [Beignet] [PATCH V4] Add profiling feature based on new event
>       implementation.
> 
> It seems still have bugs.
> Here are some logs I got, the gen timestamps is print in the function 
> cl_event_update_timestamp_gen, and the final result is print last as 
> timestamp.
> 
> gen timestamp[0] is d88bddb30
> gen timestamp[1] is d88bde2b0
> run for 8 times
> gen timestamp[2] is d88bddae0 // It is smaller than timestamp[0] we get some 
> negative value
> gen timestamp[3] is d8f002390
> timestamp[2] is ffffffffffffffaf
> timestamp[3] is 642485f
> gen timestamp[0] is d8f03fab0
> gen timestamp[1] is d8f0400f0
> run for 9 times
> gen timestamp[2] is d8f03fd30
> gen timestamp[3] is d954687d0
> timestamp[2] is 27f
> timestamp[3] is 6428d1f
> gen timestamp[0] is d954a9d20
> gen timestamp[1] is d954aa450
> run for 10 times
> gen timestamp[2] is d954a9d20 //It is the same as timestamp[0] we get -1
> gen timestamp[3] is d9b8df420
> timestamp[2] is ffffffffffffffff
> 
> 
> The overflow handler seems to have some problems.
> 
> -----Original Message-----
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of 
> junyan...@inbox.com
> Sent: Monday, December 19, 2016 7:24 PM
> To: beignet@lists.freedesktop.org
> Subject: [Beignet] [PATCH V4] Add profiling feature based on new event 
> implementation.
> 
> From: Junyan He <junyan...@intel.com>
> 
> TODO:
> In opencl 2.0, a new profiling item called CL_PROFILING_COMMAND_COMPLETE is 
> imported. It means that we need to record the time stamp of all the child 
> events created by the "Kernel enqueing kernels" feature finish.
> This should be done after the "Kernel enqueing kernels" feature enabled.
> 
> V2:
> Update event time stamp before inserting to queue thread, avoid MT issue.
> 
> V3:
> Fixup overflow problem.
> 
> V4:
> Fixup overflow to 0xfffffffffffffffff problem.
> Just take ownership and release event lock when call the update timestamp 
> function. The update timestamp function may have block system call can should 
> not hold the lock to call it.
> 
> Signed-off-by: Junyan He <junyan...@intel.com>
> ---
>  src/cl_api.c                   |  51 ----------------
>  src/cl_api_event.c             |  41 +++++++++++++
>  src/cl_api_mem.c               |   9 +++
>  src/cl_base_object.c           |  29 ++++++---
>  src/cl_base_object.h           |  10 ++--
>  src/cl_command_queue_enqueue.c |   2 +
>  src/cl_driver.h                |   4 +-
>  src/cl_enqueue.c               |   9 ---
>  src/cl_event.c                 | 132 
> ++++++++++++++++++++++++++++++++---------
>  src/cl_event.h                 |  10 ++--
>  src/intel/intel_gpgpu.c        |  16 +++--
>  11 files changed, 195 insertions(+), 118 deletions(-)
> 
> diff --git a/src/cl_api.c b/src/cl_api.c index d7b5434..6a4f4ec 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -1312,57 +1312,6 @@ error:
>    return err;
>  }
>  
> -
> -cl_int
> -clGetEventProfilingInfo(cl_event             event,
> -                        cl_profiling_info    param_name,
> -                        size_t               param_value_size,
> -                        void *               param_value,
> -                        size_t *             param_value_size_ret)
> -{
> -  cl_int err = CL_SUCCESS;
> -  cl_ulong ret_val;
> -
> -  CHECK_EVENT(event);
> -  //cl_event_update_status(event, 0);
> -
> -  if (event->event_type == CL_COMMAND_USER ||
> -      !(event->queue->props & CL_QUEUE_PROFILING_ENABLE) ||
> -          event->status != CL_COMPLETE) {
> -    err = CL_PROFILING_INFO_NOT_AVAILABLE;
> -    goto error;
> -  }
> -
> -  if (param_value && param_value_size < sizeof(cl_ulong)) {
> -    err = CL_INVALID_VALUE;
> -    goto error;
> -  }
> -
> -  if (param_name == CL_PROFILING_COMMAND_QUEUED) {
> -    ret_val = event->queued_timestamp;
> -  } else if (param_name == CL_PROFILING_COMMAND_SUBMIT) {
> -    ret_val= event->queued_timestamp + 
> cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[1]);
> -  } else if (param_name == CL_PROFILING_COMMAND_START) {
> -    err = cl_event_get_timestamp(event, CL_PROFILING_COMMAND_START);
> -    ret_val = event->queued_timestamp + cl_event_get_start_timestamp(event);
> -  } else if (param_name == CL_PROFILING_COMMAND_END) {
> -    err = cl_event_get_timestamp(event, CL_PROFILING_COMMAND_END);
> -    ret_val =  event->queued_timestamp + cl_event_get_end_timestamp(event);
> -  } else {
> -    err = CL_INVALID_VALUE;
> -    goto error;
> -  }
> -
> -  if (err == CL_SUCCESS) {
> -    if (param_value)
> -      *(cl_ulong*)param_value = ret_val;
> -    if (param_value_size_ret)
> -      *param_value_size_ret = sizeof(cl_ulong);
> -  }
> -error:
> -  return err;
> -}
> -
>  cl_mem clCreatePipe (cl_context context,
>                       cl_mem_flags flags,
>                       cl_uint pipe_packet_size, diff --git 
> a/src/cl_api_event.c b/src/cl_api_event.c index 8f2b8e0..af1442a 100644
> --- a/src/cl_api_event.c
> +++ b/src/cl_api_event.c
> @@ -290,3 +290,44 @@ clGetEventInfo(cl_event event,
>    return cl_get_info_helper(src_ptr, src_size,
>                              param_value, param_value_size, 
> param_value_size_ret);  }
> +
> +cl_int
> +clGetEventProfilingInfo(cl_event event,
> +                        cl_profiling_info param_name,
> +                        size_t param_value_size,
> +                        void *param_value,
> +                        size_t *param_value_size_ret) {
> +  cl_ulong ret_val;
> +
> +  if (!CL_OBJECT_IS_EVENT(event)) {
> +    return CL_INVALID_EVENT;
> +  }
> +
> +  assert(event->event_type == CL_COMMAND_USER || event->queue != NULL);  
> + if (event->event_type == CL_COMMAND_USER ||
> +      !(event->queue->props & CL_QUEUE_PROFILING_ENABLE) ||
> +      cl_event_get_status(event) != CL_COMPLETE) {
> +    return CL_PROFILING_INFO_NOT_AVAILABLE;  }
> +
> +  if (param_value && param_value_size < sizeof(cl_ulong)) {
> +    return CL_INVALID_VALUE;
> +  }
> +
> +  if (param_name < CL_PROFILING_COMMAND_QUEUED ||
> +      param_name > CL_PROFILING_COMMAND_COMPLETE) {
> +    return CL_INVALID_VALUE;
> +  }
> +
> +  ret_val = event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED];  
> + if (ret_val == CL_EVENT_INVALID_TIMESTAMP) {
> +    return CL_INVALID_VALUE;
> +  }
> +
> +  if (param_value)
> +    *(cl_ulong *)param_value = ret_val;
> +  if (param_value_size_ret)
> +    *param_value_size_ret = sizeof(cl_ulong);
> +  return CL_SUCCESS;
> +}
> diff --git a/src/cl_api_mem.c b/src/cl_api_mem.c index 07be706..de18684 100644
> --- a/src/cl_api_mem.c
> +++ b/src/cl_api_mem.c
> @@ -318,6 +318,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
>  
>        ptr = data->ptr;
>        e->status = CL_COMPLETE; // Just set the status, no notify. No one 
> depend on us now.
> +      cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE);
>      } else {
>        err = cl_enqueue_handle(data, CL_SUBMITTED); // Submit to get the 
> address.
>        if (err != CL_SUCCESS) {
> @@ -410,6 +411,7 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue,
>        }
>  
>        e->status = CL_COMPLETE;
> +      cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE);
>      } else { // May need to wait some event to complete.
>        cl_command_queue_enqueue_event(command_queue, e);
>      }
> @@ -513,6 +515,7 @@ clEnqueueReadBuffer(cl_command_queue command_queue,
>        }
>  
>        e->status = CL_COMPLETE; // Just set the status, no notify. No one 
> depend on us now.
> +      cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE);
>      } else {
>        cl_command_queue_enqueue_event(command_queue, e);
>      }
> @@ -616,6 +619,7 @@ clEnqueueWriteBuffer(cl_command_queue command_queue,
>        }
>  
>        e->status = CL_COMPLETE; // Just set the status, no notify. No one 
> depend on us now.
> +      cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE);
>      } else {
>        cl_command_queue_enqueue_event(command_queue, e);
>      }
> @@ -765,6 +769,7 @@ clEnqueueReadBufferRect(cl_command_queue command_queue,
>        }
>  
>        e->status = CL_COMPLETE; // Just set the status, no notify. No one 
> depend on us now.
> +      cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE);
>      } else {
>        cl_command_queue_enqueue_event(command_queue, e);
>      }
> @@ -916,6 +921,7 @@ clEnqueueWriteBufferRect(cl_command_queue command_queue,
>        }
>  
>        e->status = CL_COMPLETE; // Just set the status, no notify. No one 
> depend on us now.
> +      cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE);
>      } else {
>        cl_command_queue_enqueue_event(command_queue, e);
>      }
> @@ -1601,6 +1607,7 @@ clEnqueueMapImage(cl_command_queue command_queue,
>  
>        ptr = data->ptr;
>        e->status = CL_COMPLETE; // Just set the status, no notify. No one 
> depend on us now.
> +      cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE);
>      } else {
>        err = cl_enqueue_handle(data, CL_SUBMITTED); // Submit to get the 
> address.
>        if (err != CL_SUCCESS) {
> @@ -1798,6 +1805,7 @@ clEnqueueReadImage(cl_command_queue command_queue,
>        }
>  
>        e->status = CL_COMPLETE; // Just set the status, no notify. No one 
> depend on us now.
> +      cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE);
>      } else {
>        cl_command_queue_enqueue_event(command_queue, e);
>      }
> @@ -1950,6 +1958,7 @@ clEnqueueWriteImage(cl_command_queue command_queue,
>        }
>  
>        e->status = CL_COMPLETE; // Just set the status, no notify. No one 
> depend on us now.
> +      cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE);
>      } else {
>        cl_command_queue_enqueue_event(command_queue, e);
>      }
> diff --git a/src/cl_base_object.c b/src/cl_base_object.c index 
> 00c4b35..cbbd872 100644
> --- a/src/cl_base_object.c
> +++ b/src/cl_base_object.c
> @@ -66,7 +66,7 @@ cl_object_destroy_base(cl_base_object obj)  }
>  
>  LOCAL cl_int
> -cl_object_take_ownership(cl_base_object obj, cl_int wait)
> +cl_object_take_ownership(cl_base_object obj, cl_int wait, cl_bool 
> +withlock)
>  {
>    pthread_t self;
>  
> @@ -74,21 +74,26 @@ cl_object_take_ownership(cl_base_object obj, cl_int wait)
>  
>    self = pthread_self();
>  
> -  pthread_mutex_lock(&obj->mutex);
> +  if (withlock == CL_FALSE)
> +    pthread_mutex_lock(&obj->mutex);
>  
>    if (pthread_equal(obj->owner, self)) { // Already get
> -    pthread_mutex_unlock(&obj->mutex);
> +    if (withlock == CL_FALSE)
> +      pthread_mutex_unlock(&obj->mutex);
>      return 1;
>    }
>  
>    if (pthread_equal(obj->owner, invalid_thread_id)) {
>      obj->owner = self;
> -    pthread_mutex_unlock(&obj->mutex);
> +
> +    if (withlock == CL_FALSE)
> +      pthread_mutex_unlock(&obj->mutex);
>      return 1;
>    }
>  
>    if (wait == 0) {
> -    pthread_mutex_unlock(&obj->mutex);
> +    if (withlock == CL_FALSE)
> +      pthread_mutex_unlock(&obj->mutex);
>      return 0;
>    }
>  
> @@ -97,21 +102,27 @@ cl_object_take_ownership(cl_base_object obj, cl_int wait)
>    }
>  
>    obj->owner = self;
> -  pthread_mutex_unlock(&obj->mutex);
> +
> +  if (withlock == CL_FALSE)
> +    pthread_mutex_unlock(&obj->mutex);
> +
>    return 1;
>  }
>  
>  LOCAL void
> -cl_object_release_ownership(cl_base_object obj)
> +cl_object_release_ownership(cl_base_object obj, cl_bool withlock)
>  {
>    assert(CL_OBJECT_IS_VALID(obj));
>  
> -  pthread_mutex_lock(&obj->mutex);
> +  if (withlock == CL_FALSE)
> +    pthread_mutex_lock(&obj->mutex);
> +
>    assert(pthread_equal(pthread_self(), obj->owner));
>    obj->owner = invalid_thread_id;
>    pthread_cond_broadcast(&obj->cond);
>  
> -  pthread_mutex_unlock(&obj->mutex);
> +  if (withlock == CL_FALSE)
> +    pthread_mutex_unlock(&obj->mutex);
>  }
>  
>  LOCAL void
> diff --git a/src/cl_base_object.h b/src/cl_base_object.h index 
> 4e643df..9d9dd94 100644
> --- a/src/cl_base_object.h
> +++ b/src/cl_base_object.h
> @@ -67,15 +67,17 @@ typedef struct _cl_base_object *cl_base_object;
>  
>  extern void cl_object_init_base(cl_base_object obj, cl_ulong magic);  extern 
> void cl_object_destroy_base(cl_base_object obj); -extern cl_int 
> cl_object_take_ownership(cl_base_object obj, cl_int wait); -extern void 
> cl_object_release_ownership(cl_base_object obj);
> +extern cl_int cl_object_take_ownership(cl_base_object obj, cl_int wait, 
> +cl_bool withlock); extern void 
> +cl_object_release_ownership(cl_base_object obj, cl_bool withlock);
>  extern void cl_object_wait_on_cond(cl_base_object obj);  extern void 
> cl_object_notify_cond(cl_base_object obj);
>  
>  #define CL_OBJECT_INIT_BASE(obj, magic) 
> (cl_object_init_base((cl_base_object)obj, magic))  #define 
> CL_OBJECT_DESTROY_BASE(obj) (cl_object_destroy_base((cl_base_object)obj))
> -#define CL_OBJECT_TAKE_OWNERSHIP(obj, wait) 
> (cl_object_take_ownership((cl_base_object)obj, wait)) -#define 
> CL_OBJECT_RELEASE_OWNERSHIP(obj) 
> (cl_object_release_ownership((cl_base_object)obj))
> +#define CL_OBJECT_TAKE_OWNERSHIP(obj, wait) 
> +(cl_object_take_ownership((cl_base_object)obj, wait, CL_FALSE)) #define 
> +CL_OBJECT_RELEASE_OWNERSHIP(obj) 
> +(cl_object_release_ownership((cl_base_object)obj, CL_FALSE)) #define 
> +CL_OBJECT_TAKE_OWNERSHIP_WITHLOCK(obj, wait) 
> +(cl_object_take_ownership((cl_base_object)obj, wait, CL_TRUE)) #define 
> +CL_OBJECT_RELEASE_OWNERSHIP_WITHLOCK(obj) 
> +(cl_object_release_ownership((cl_base_object)obj, CL_TRUE))
>  #define CL_OBJECT_WAIT_ON_COND(obj) 
> (cl_object_wait_on_cond((cl_base_object)obj))
>  #define CL_OBJECT_NOTIFY_COND(obj) 
> (cl_object_notify_cond((cl_base_object)obj))
>  
> diff --git a/src/cl_command_queue_enqueue.c b/src/cl_command_queue_enqueue.c 
> index 32545b3..cf9ee3f 100644
> --- a/src/cl_command_queue_enqueue.c
> +++ b/src/cl_command_queue_enqueue.c
> @@ -135,6 +135,8 @@ LOCAL void
>  cl_command_queue_enqueue_event(cl_command_queue queue, cl_event event)  {
>    CL_OBJECT_INC_REF(event);
> +  cl_event_update_timestamp(event, CL_QUEUED, event->status);
> +
>    assert(CL_OBJECT_IS_COMMAND_QUEUE(queue));
>    CL_OBJECT_LOCK(queue);
>    assert(queue->worker.quit == CL_FALSE); diff --git a/src/cl_driver.h 
> b/src/cl_driver.h index b45e2fb..9b2fd32 100644
> --- a/src/cl_driver.h
> +++ b/src/cl_driver.h
> @@ -262,11 +262,11 @@ typedef void (cl_gpgpu_event_delete_cb)(cl_gpgpu_event);
>  extern cl_gpgpu_event_delete_cb *cl_gpgpu_event_delete;
>  
>  /* Get a event time stamp */
> -typedef void (cl_gpgpu_event_get_exec_timestamp_cb)(cl_gpgpu, 
> cl_gpgpu_event, int, uint64_t*);
> +typedef void (cl_gpgpu_event_get_exec_timestamp_cb)(cl_gpgpu, int, 
> +uint64_t*);
>  extern cl_gpgpu_event_get_exec_timestamp_cb 
> *cl_gpgpu_event_get_exec_timestamp;
>  
>  /* Get current GPU time stamp */
> -typedef void (cl_gpgpu_event_get_gpu_cur_timestamp_cb)(cl_gpgpu, uint64_t*);
> +typedef void (cl_gpgpu_event_get_gpu_cur_timestamp_cb)(cl_driver, 
> +uint64_t*);
>  extern cl_gpgpu_event_get_gpu_cur_timestamp_cb 
> *cl_gpgpu_event_get_gpu_cur_timestamp;
>  
>  /* Get current batch buffer handle */
> diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c index fbcd7b6..59605f9 100644
> --- a/src/cl_enqueue.c
> +++ b/src/cl_enqueue.c
> @@ -572,9 +572,6 @@ cl_enqueue_ndrange(enqueue_data *data, cl_int status)
>      void *batch_buf = cl_gpgpu_ref_batch_buf(data->gpgpu);
>      cl_gpgpu_sync(batch_buf);
>      cl_gpgpu_unref_batch_buf(batch_buf);
> -    /* Finished, we can release the gpgpu now. */
> -    cl_gpgpu_delete(data->gpgpu);
> -    data->gpgpu = NULL;
>    }
>  
>    return err;
> @@ -626,12 +623,6 @@ cl_enqueue_delete(enqueue_data *data)  LOCAL cl_int  
> cl_enqueue_handle(enqueue_data *data, cl_int status)  {
> -  /* if need profiling, add the submit timestamp here. */
> -  //  if (event && event->event_type != CL_COMMAND_USER &&
> -  //      event->queue->props & CL_QUEUE_PROFILING_ENABLE) {
> -  //    cl_event_get_timestamp(event, CL_PROFILING_COMMAND_SUBMIT);
> -  //  }
> -
>    switch (data->type) {
>    case EnqueueReturnSuccesss:
>      return CL_SUCCESS;
> diff --git a/src/cl_event.c b/src/cl_event.c index 0804dbd..212f184 100644
> --- a/src/cl_event.c
> +++ b/src/cl_event.c
> @@ -23,46 +23,107 @@
>  #include <string.h>
>  #include <stdio.h>
>  
> -LOCAL cl_int
> -cl_event_get_timestamp(cl_event event, cl_profiling_info param_name)
> +// TODO: Need to move it to some device related file later.
> +static void
> +cl_event_update_timestamp_gen(cl_event event, cl_int status)
>  {
> -  // TODO:
> -  return CL_INVALID_VALUE;
> +  cl_ulong ts = 0;
> +
> +  if ((event->exec_data.type == EnqueueCopyBufferRect) ||
> +      (event->exec_data.type == EnqueueCopyBuffer) ||
> +      (event->exec_data.type == EnqueueCopyImage) ||
> +      (event->exec_data.type == EnqueueCopyBufferToImage) ||
> +      (event->exec_data.type == EnqueueCopyImageToBuffer) ||
> +      (event->exec_data.type == EnqueueNDRangeKernel) ||
> +      (event->exec_data.type == EnqueueFillBuffer) ||
> +      (event->exec_data.type == EnqueueFillImage)) {
> +
> +    if (status == CL_QUEUED || status == CL_SUBMITTED) {
> +      cl_gpgpu_event_get_gpu_cur_timestamp(event->queue->ctx->drv, 
> + &ts);
> +
> +      if (ts == CL_EVENT_INVALID_TIMESTAMP)
> +        ts++;
> +      event->timestamp[CL_QUEUED - status] = ts;
> +      return;
> +    } else if (status == CL_RUNNING) {
> +      assert(event->exec_data.gpgpu);
> +      return; // Wait for the event complete and get run and complete then.
> +    } else {
> +      assert(event->exec_data.gpgpu);
> +      cl_gpgpu_event_get_exec_timestamp(event->exec_data.gpgpu, 0, &ts);
> +      if (ts == CL_EVENT_INVALID_TIMESTAMP)
> +        ts++;
> +      event->timestamp[2] = ts;
> +      cl_gpgpu_event_get_exec_timestamp(event->exec_data.gpgpu, 1, &ts);
> +      if (ts == CL_EVENT_INVALID_TIMESTAMP)
> +        ts++;
> +      event->timestamp[3] = ts;
> +      return;
> +    }
> +  } else {
> +    cl_gpgpu_event_get_gpu_cur_timestamp(event->queue->ctx->drv, &ts);
> +    if (ts == CL_EVENT_INVALID_TIMESTAMP)
> +      ts++;
> +    event->timestamp[CL_QUEUED - status] = ts;
> +    return;
> +  }
>  }
>  
> -LOCAL cl_ulong
> -cl_event_get_timestamp_delta(cl_ulong start_timestamp, cl_ulong 
> end_timestamp)
> +LOCAL void
> +cl_event_update_timestamp(cl_event event, cl_int from, cl_int to)
>  {
> -  cl_ulong ret_val;
> +  int i;
> +  cl_bool re_cal = CL_FALSE;
> +  cl_ulong ts[4];
>  
> -  if (end_timestamp > start_timestamp) {
> -    ret_val = end_timestamp - start_timestamp;
> -  } else {
> -    /*if start time stamp is greater than end timstamp then set ret value to 
> max*/
> -    ret_val = ((cl_ulong)1 << 32);
> -  }
> +  assert(from >= to);
> +  assert(from >= CL_COMPLETE || from <= CL_QUEUED);  assert(to >= 
> + CL_COMPLETE || to <= CL_QUEUED);
>  
> -  return ret_val;
> -}
> +  if (event->event_type == CL_COMMAND_USER)
> +    return;
>  
> -LOCAL cl_ulong
> -cl_event_get_start_timestamp(cl_event event) -{
> -  cl_ulong ret_val;
> +  assert(event->queue);
> +  if ((event->queue->props & CL_QUEUE_PROFILING_ENABLE) == 0)
> +    return;
>  
> -  ret_val = cl_event_get_timestamp_delta(event->timestamp[0], 
> event->timestamp[2]);
> +  i = CL_QUEUED - from;
> +  if (event->timestamp[i] == CL_EVENT_INVALID_TIMESTAMP)
> +    cl_event_update_timestamp_gen(event, from);  i++;
>  
> -  return ret_val;
> -}
> +  for (; i <= CL_QUEUED - to; i++) {
> +    cl_event_update_timestamp_gen(event, CL_QUEUED - i);  }
>  
> -LOCAL cl_ulong
> -cl_event_get_end_timestamp(cl_event event) -{
> -  cl_ulong ret_val;
> +  if (to == CL_COMPLETE) {
> +    // TODO: Need to set the CL_PROFILING_COMMAND_COMPLETE when enable child 
> enqueue.
> +    // Just a duplicate of event complete time now.
> +    event->timestamp[4] = event->timestamp[3];
>  
> -  ret_val = cl_event_get_timestamp_delta(event->timestamp[0], 
> event->timestamp[3]);
> +    /* If timestamp overflow, set queued time to 0 and re-calculate. */
> +    for (i = 0; i < 4; i++) {
> +      if (event->timestamp[i + 1] < event->timestamp[i]) {
> +        re_cal = CL_TRUE;
> +        break;
> +      }
> +    }
> +
> +    if (re_cal) {
> +      for (i = 3; i >= 0; i--) {
> +        if (event->timestamp[i + 1] < event->timestamp[i]) { //overflow
> +          ts[i] = event->timestamp[i + 1] + (CL_EVENT_INVALID_TIMESTAMP - 
> event->timestamp[i]);
> +        } else {
> +          ts[i] = event->timestamp[i + 1] - event->timestamp[i];
> +        }
> +      }
>  
> -  return ret_val;
> +      event->timestamp[0] = 0;
> +      for (i = 1; i < 5; i++) {
> +        event->timestamp[i] = event->timestamp[i - 1] + ts[i - 1];
> +      }
> +    }
> +  }
>  }
>  
>  LOCAL void
> @@ -88,6 +149,7 @@ static cl_event
>  cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type,
>               cl_uint num_events, cl_event *event_list)  {
> +  int i;
>    cl_event e = cl_calloc(1, sizeof(_cl_event));
>    if (e == NULL)
>      return NULL;
> @@ -115,6 +177,10 @@ cl_event_new(cl_context ctx, cl_command_queue queue, 
> cl_command_type type,
>  
>    e->depend_events = event_list;
>    e->depend_event_num = num_events;
> +  for (i = 0; i < 4; i++) {
> +    e->timestamp[i] = CL_EVENT_INVALID_TIMESTAMP;  }
> +
>    return e;
>  }
>  
> @@ -317,6 +383,16 @@ cl_event_set_status(cl_event event, cl_int status)
>      return CL_INVALID_OPERATION;
>    }
>  
> +  if (status >= CL_COMPLETE && !CL_EVENT_IS_USER(event) &&
> +      (event->queue->props & CL_QUEUE_PROFILING_ENABLE) != 0) {
> +    // Call update_timestamp without event lock.
> +    CL_OBJECT_TAKE_OWNERSHIP_WITHLOCK(event, 1);
> +    CL_OBJECT_UNLOCK(event);
> +    cl_event_update_timestamp(event, event->status, status);
> +    CL_OBJECT_LOCK(event);
> +    CL_OBJECT_RELEASE_OWNERSHIP_WITHLOCK(event);
> +  }
> +
>    event->status = status;
>  
>    /* Call all the callbacks. */
> diff --git a/src/cl_event.h b/src/cl_event.h index f67299c..9df5ab6 100644
> --- a/src/cl_event.h
> +++ b/src/cl_event.h
> @@ -48,8 +48,7 @@ typedef struct _cl_event {
>    cl_uint depend_event_num;   /* The depend events number. */
>    list_head callbacks;        /* The events The event callback functions */
>    list_head enqueue_node;     /* The node in the enqueue list. */
> -  cl_ulong timestamp[4];      /* The time stamps for profiling. */
> -  cl_ulong queued_timestamp;
> +  cl_ulong timestamp[5];      /* The time stamps for profiling. */
>    enqueue_data exec_data; /* Context for execute this event. */  } _cl_event;
>  
> @@ -62,6 +61,8 @@ typedef struct _cl_event {  #define CL_EVENT_IS_BARRIER(E) 
> (E->event_type == CL_COMMAND_BARRIER)  #define CL_EVENT_IS_USER(E) 
> (E->event_type == CL_COMMAND_USER)
>  
> +#define CL_EVENT_INVALID_TIMESTAMP 0xFFFFFFFFFFFFFFFF
> +
>  /* Create a new event object */
>  extern cl_event cl_event_create(cl_context ctx, cl_command_queue queue, 
> cl_uint num_events,
>                                  const cl_event *event_list, cl_command_type 
> type, cl_int *errcode_ret); @@ -78,11 +79,8 @@ extern cl_int 
> cl_event_set_callback(cl_event event, cl_int exec_type,
>                                      cl_event_notify_cb pfn_notify, void 
> *user_data);  extern cl_int cl_event_wait_for_events_list(cl_uint num_events, 
> const cl_event *event_list);  extern cl_int 
> cl_event_wait_for_event_ready(cl_event event); -extern cl_ulong 
> cl_event_get_timestamp_delta(cl_ulong start_timestamp, cl_ulong 
> end_timestamp); -extern cl_ulong cl_event_get_start_timestamp(cl_event 
> event); -extern cl_ulong cl_event_get_end_timestamp(cl_event event); -extern 
> cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name);  
> extern cl_event cl_event_create_marker_or_barrier(cl_command_queue queue, 
> cl_uint num_events_in_wait_list,
>                                                    const cl_event 
> *event_wait_list, cl_bool is_barrier,
>                                                    cl_int* error);
> +extern void cl_event_update_timestamp(cl_event event, cl_int 
> +from_status, cl_int to_status);
>  #endif /* __CL_EVENT_H__ */
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 
> d56d35d..e35b9d1 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -2277,10 +2277,10 @@ intel_gpgpu_read_ts_reg_baytrail(drm_intel_bufmgr 
> *bufmgr)
>  
>  /* We want to get the current time of GPU. */  static void
> -intel_gpgpu_event_get_gpu_cur_timestamp(intel_gpgpu_t* gpgpu, uint64_t* 
> ret_ts)
> +intel_gpgpu_event_get_gpu_cur_timestamp(intel_driver_t* gen_driver, 
> +uint64_t* ret_ts)
>  {
>    uint64_t result = 0;
> -  drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr;
> +  drm_intel_bufmgr *bufmgr = gen_driver->bufmgr;
>  
>    /* Get the ts that match the bspec */
>    result = intel_gpgpu_read_ts_reg(bufmgr); @@ -2292,15 +2292,13 @@ 
> intel_gpgpu_event_get_gpu_cur_timestamp(intel_gpgpu_t* gpgpu, uint64_t* 
> ret_ts)
>  
>  /* Get the GPU execute time. */
>  static void
> -intel_gpgpu_event_get_exec_timestamp(intel_gpgpu_t* gpgpu, intel_event_t 
> *event,
> -                                  int index, uint64_t* ret_ts)
> +intel_gpgpu_event_get_exec_timestamp(intel_gpgpu_t* gpgpu, int index, 
> +uint64_t* ret_ts)
>  {
>    uint64_t result = 0;
> -
> -  assert(event->ts_buf != NULL);
> +  assert(gpgpu->time_stamp_b.bo);
>    assert(index == 0 || index == 1);
> -  drm_intel_gem_bo_map_gtt(event->ts_buf);
> -  uint64_t* ptr = event->ts_buf->virtual;
> +  drm_intel_gem_bo_map_gtt(gpgpu->time_stamp_b.bo);
> +  uint64_t* ptr = gpgpu->time_stamp_b.bo->virtual;
>    result = ptr[index];
>  
>    /* According to BSpec, the timestamp counter should be 36 bits, @@ -2311,7 
> +2309,7 @@ intel_gpgpu_event_get_exec_timestamp(intel_gpgpu_t* gpgpu, 
> intel_event_t *event,
>    result = (result & 0x0FFFFFFFF) * 80; //convert to nanoseconds
>    *ret_ts = result;
>  
> -  drm_intel_gem_bo_unmap_gtt(event->ts_buf);
> +  drm_intel_gem_bo_unmap_gtt(gpgpu->time_stamp_b.bo);
>  }
>  
>  static int
> --
> 2.7.4
> 
> 
> 
> _______________________________________________
> Beignet mailing list
> Beignet@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet


_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to