[PATCH libdrm 1/2] amdgpu: add the interface of waiting multiple fences

2017-04-13 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Signed-off-by: Junwei Zhang 
[v2: allow returning the first signaled fence index]
Signed-off-by: monk.liu 
[v3:
 - cleanup *status setting
 - fix amdgpu symbols check]
Signed-off-by: Nicolai Hähnle 
Reviewed-by: Christian König  (v1)
Reviewed-by: Jammy Zhou  (v1)
---
 amdgpu/amdgpu-symbol-check |  1 +
 amdgpu/amdgpu.h| 23 ++
 amdgpu/amdgpu_cs.c | 74 ++
 3 files changed, 98 insertions(+)

diff --git a/amdgpu/amdgpu-symbol-check b/amdgpu/amdgpu-symbol-check
index 4d1ae65..81ef9b4 100755
--- a/amdgpu/amdgpu-symbol-check
+++ b/amdgpu/amdgpu-symbol-check
@@ -26,20 +26,21 @@ amdgpu_bo_va_op_raw
 amdgpu_bo_wait_for_idle
 amdgpu_create_bo_from_user_mem
 amdgpu_cs_create_semaphore
 amdgpu_cs_ctx_create
 amdgpu_cs_ctx_free
 amdgpu_cs_destroy_semaphore
 amdgpu_cs_query_fence_status
 amdgpu_cs_query_reset_state
 amdgpu_cs_signal_semaphore
 amdgpu_cs_submit
+amdgpu_cs_wait_fences
 amdgpu_cs_wait_semaphore
 amdgpu_device_deinitialize
 amdgpu_device_initialize
 amdgpu_get_marketing_name
 amdgpu_query_buffer_size_alignment
 amdgpu_query_crtc_from_id
 amdgpu_query_firmware_version
 amdgpu_query_gds_info
 amdgpu_query_gpu_info
 amdgpu_query_heap_info
diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
index 55884b2..fdea905 100644
--- a/amdgpu/amdgpu.h
+++ b/amdgpu/amdgpu.h
@@ -900,20 +900,43 @@ int amdgpu_cs_submit(amdgpu_context_handle context,
  *  returned in the case if submission was completed or timeout error
  *  code.
  *
  * \sa amdgpu_cs_submit()
 */
 int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
 uint64_t timeout_ns,
 uint64_t flags,
 uint32_t *expired);
 
+/**
+ *  Wait for multiple fences
+ *
+ * \param   fences  - \c [in] The fence array to wait
+ * \param   fence_count - \c [in] The fence count
+ * \param   wait_all- \c [in] If true, wait all fences to be signaled,
+ *otherwise, wait at least one fence
+ * \param   timeout_ns  - \c [in] The timeout to wait, in nanoseconds
+ * \param   status  - \c [out] '1' for signaled, '0' for timeout
+ * \param   first   - \c [out] the index of the first signaled fence from 
@fences
+ *
+ * \return  0 on success
+ *  <0 - Negative POSIX Error code
+ *
+ * \noteCurrently it supports only one amdgpu_device. All fences come from
+ *  the same amdgpu_device with the same fd.
+*/
+int amdgpu_cs_wait_fences(struct amdgpu_cs_fence *fences,
+ uint32_t fence_count,
+ bool wait_all,
+ uint64_t timeout_ns,
+ uint32_t *status, uint32_t *first);
+
 /*
  * Query / Info API
  *
 */
 
 /**
  * Query allocation size alignments
  *
  * UMD should query information about GPU VM MC size alignments requirements
  * to be able correctly choose required allocation size and implement
diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c
index fb5b3a8..707e6d1 100644
--- a/amdgpu/amdgpu_cs.c
+++ b/amdgpu/amdgpu_cs.c
@@ -436,20 +436,94 @@ int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence 
*fence,
r = amdgpu_ioctl_wait_cs(fence->context, fence->ip_type,
fence->ip_instance, fence->ring,
fence->fence, timeout_ns, flags, &busy);
 
if (!r && !busy)
*expired = true;
 
return r;
 }
 
+static int amdgpu_ioctl_wait_fences(struct amdgpu_cs_fence *fences,
+   uint32_t fence_count,
+   bool wait_all,
+   uint64_t timeout_ns,
+   uint32_t *status,
+   uint32_t *first)
+{
+   struct drm_amdgpu_fence *drm_fences;
+   amdgpu_device_handle dev = fences[0].context->dev;
+   union drm_amdgpu_wait_fences args;
+   int r;
+   uint32_t i;
+
+   drm_fences = alloca(sizeof(struct drm_amdgpu_fence) * fence_count);
+   for (i = 0; i < fence_count; i++) {
+   drm_fences[i].ctx_id = fences[i].context->id;
+   drm_fences[i].ip_type = fences[i].ip_type;
+   drm_fences[i].ip_instance = fences[i].ip_instance;
+   drm_fences[i].ring = fences[i].ring;
+   drm_fences[i].seq_no = fences[i].fence;
+   }
+
+   memset(&args, 0, sizeof(args));
+   args.in.fences = (uint64_t)(uintptr_t)drm_fences;
+   args.in.fence_count = fence_count;
+   args.in.wait_all = wait_all;
+   args.in.timeout_ns = amdgpu_cs_calculate_timeout(timeout_ns);
+
+   r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_FENCES, &args);
+   if (r)
+   return -errno;
+
+   *status = args.out.status;
+
+   if (first)
+   *first = args.out.first_signaled;
+
+   return 0;
+}
+
+int amdgpu_cs

Re: [PATCH libdrm 1/2] amdgpu: add the interface of waiting multiple fences

2017-04-18 Thread Edward O'Callaghan


On 04/14/2017 12:47 AM, Nicolai Hähnle wrote:
> From: Nicolai Hähnle 
> 
> Signed-off-by: Junwei Zhang 
> [v2: allow returning the first signaled fence index]
> Signed-off-by: monk.liu 
> [v3:
>  - cleanup *status setting
>  - fix amdgpu symbols check]
> Signed-off-by: Nicolai Hähnle 
> Reviewed-by: Christian König  (v1)
> Reviewed-by: Jammy Zhou  (v1)
> ---
>  amdgpu/amdgpu-symbol-check |  1 +
>  amdgpu/amdgpu.h| 23 ++
>  amdgpu/amdgpu_cs.c | 74 
> ++
>  3 files changed, 98 insertions(+)
> 
> diff --git a/amdgpu/amdgpu-symbol-check b/amdgpu/amdgpu-symbol-check
> index 4d1ae65..81ef9b4 100755
> --- a/amdgpu/amdgpu-symbol-check
> +++ b/amdgpu/amdgpu-symbol-check
> @@ -26,20 +26,21 @@ amdgpu_bo_va_op_raw
>  amdgpu_bo_wait_for_idle
>  amdgpu_create_bo_from_user_mem
>  amdgpu_cs_create_semaphore
>  amdgpu_cs_ctx_create
>  amdgpu_cs_ctx_free
>  amdgpu_cs_destroy_semaphore
>  amdgpu_cs_query_fence_status
>  amdgpu_cs_query_reset_state
>  amdgpu_cs_signal_semaphore
>  amdgpu_cs_submit
> +amdgpu_cs_wait_fences
>  amdgpu_cs_wait_semaphore
>  amdgpu_device_deinitialize
>  amdgpu_device_initialize
>  amdgpu_get_marketing_name
>  amdgpu_query_buffer_size_alignment
>  amdgpu_query_crtc_from_id
>  amdgpu_query_firmware_version
>  amdgpu_query_gds_info
>  amdgpu_query_gpu_info
>  amdgpu_query_heap_info
> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
> index 55884b2..fdea905 100644
> --- a/amdgpu/amdgpu.h
> +++ b/amdgpu/amdgpu.h
> @@ -900,20 +900,43 @@ int amdgpu_cs_submit(amdgpu_context_handle context,
>   *returned in the case if submission was completed or timeout error
>   *code.
>   *
>   * \sa amdgpu_cs_submit()
>  */
>  int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
>uint64_t timeout_ns,
>uint64_t flags,
>uint32_t *expired);
>  
> +/**
> + *  Wait for multiple fences
> + *
> + * \param   fences  - \c [in] The fence array to wait
> + * \param   fence_count - \c [in] The fence count
> + * \param   wait_all- \c [in] If true, wait all fences to be signaled,
> + *otherwise, wait at least one fence
> + * \param   timeout_ns  - \c [in] The timeout to wait, in nanoseconds
> + * \param   status  - \c [out] '1' for signaled, '0' for timeout
> + * \param   first   - \c [out] the index of the first signaled fence 
> from @fences
> + *
> + * \return  0 on success
> + *  <0 - Negative POSIX Error code
> + *
> + * \noteCurrently it supports only one amdgpu_device. All fences come 
> from
> + *  the same amdgpu_device with the same fd.
> +*/
> +int amdgpu_cs_wait_fences(struct amdgpu_cs_fence *fences,
> +   uint32_t fence_count,
> +   bool wait_all,
> +   uint64_t timeout_ns,
> +   uint32_t *status, uint32_t *first);
> +
>  /*
>   * Query / Info API
>   *
>  */
>  
>  /**
>   * Query allocation size alignments
>   *
>   * UMD should query information about GPU VM MC size alignments requirements
>   * to be able correctly choose required allocation size and implement
> diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c
> index fb5b3a8..707e6d1 100644
> --- a/amdgpu/amdgpu_cs.c
> +++ b/amdgpu/amdgpu_cs.c
> @@ -436,20 +436,94 @@ int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence 
> *fence,
>   r = amdgpu_ioctl_wait_cs(fence->context, fence->ip_type,
>   fence->ip_instance, fence->ring,
>   fence->fence, timeout_ns, flags, &busy);
>  
>   if (!r && !busy)
>   *expired = true;
>  
>   return r;
>  }
>  
> +static int amdgpu_ioctl_wait_fences(struct amdgpu_cs_fence *fences,
> + uint32_t fence_count,
> + bool wait_all,
> + uint64_t timeout_ns,
> + uint32_t *status,
> + uint32_t *first)
> +{
> + struct drm_amdgpu_fence *drm_fences;
> + amdgpu_device_handle dev = fences[0].context->dev;
> + union drm_amdgpu_wait_fences args;
> + int r;
> + uint32_t i;
> +
> + drm_fences = alloca(sizeof(struct drm_amdgpu_fence) * fence_count);
> + for (i = 0; i < fence_count; i++) {
> + drm_fences[i].ctx_id = fences[i].context->id;
> + drm_fences[i].ip_type = fences[i].ip_type;
> + drm_fences[i].ip_instance = fences[i].ip_instance;
> + drm_fences[i].ring = fences[i].ring;
> + drm_fences[i].seq_no = fences[i].fence;
> + }
> +
> + memset(&args, 0, sizeof(args));
> + args.in.fences = (uint64_t)(uintptr_t)drm_fences;
> + args.in.fence_count = fence_count;
> + args.in.wait_all = wait_all;
> + args.in.timeout_ns = amdgpu_cs_calculate_timeout(timeout_ns);
> +
> + r =

Re: [PATCH libdrm 1/2] amdgpu: add the interface of waiting multiple fences

2017-04-18 Thread Nicolai Hähnle

On 18.04.2017 17:47, Edward O'Callaghan wrote:

On 04/14/2017 12:47 AM, Nicolai Hähnle wrote:

From: Nicolai Hähnle 

Signed-off-by: Junwei Zhang 
[v2: allow returning the first signaled fence index]
Signed-off-by: monk.liu 
[v3:
 - cleanup *status setting
 - fix amdgpu symbols check]
Signed-off-by: Nicolai Hähnle 
Reviewed-by: Christian König  (v1)
Reviewed-by: Jammy Zhou  (v1)
---
 amdgpu/amdgpu-symbol-check |  1 +
 amdgpu/amdgpu.h| 23 ++
 amdgpu/amdgpu_cs.c | 74 ++
 3 files changed, 98 insertions(+)


[snip]

+static int amdgpu_ioctl_wait_fences(struct amdgpu_cs_fence *fences,
+   uint32_t fence_count,
+   bool wait_all,
+   uint64_t timeout_ns,
+   uint32_t *status,
+   uint32_t *first)
+{
+   struct drm_amdgpu_fence *drm_fences;
+   amdgpu_device_handle dev = fences[0].context->dev;
+   union drm_amdgpu_wait_fences args;
+   int r;
+   uint32_t i;
+
+   drm_fences = alloca(sizeof(struct drm_amdgpu_fence) * fence_count);
+   for (i = 0; i < fence_count; i++) {
+   drm_fences[i].ctx_id = fences[i].context->id;
+   drm_fences[i].ip_type = fences[i].ip_type;
+   drm_fences[i].ip_instance = fences[i].ip_instance;
+   drm_fences[i].ring = fences[i].ring;
+   drm_fences[i].seq_no = fences[i].fence;
+   }
+
+   memset(&args, 0, sizeof(args));
+   args.in.fences = (uint64_t)(uintptr_t)drm_fences;
+   args.in.fence_count = fence_count;
+   args.in.wait_all = wait_all;
+   args.in.timeout_ns = amdgpu_cs_calculate_timeout(timeout_ns);
+
+   r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_FENCES, &args);
+   if (r)
+   return -errno;


Hi Nicolai,

you will leak drm_fences here on the error branch.


It's an alloca, so it's automatically freed when the function returns.



+
+   *status = args.out.status;
+
+   if (first)
+   *first = args.out.first_signaled;
+
+   return 0;
+}
+
+int amdgpu_cs_wait_fences(struct amdgpu_cs_fence *fences,
+ uint32_t fence_count,
+ bool wait_all,
+ uint64_t timeout_ns,
+ uint32_t *status,
+ uint32_t *first)
+{
+   uint32_t i;
+   int r;


no need for a intermediate ret, just return amdgpu_ioctl_wait_fences()
directly?


Good point, I'll change that before I push.



+
+   /* Sanity check */
+   if (NULL == fences)
+   return -EINVAL;
+   if (NULL == status)
+   return -EINVAL;
+   if (fence_count <= 0)
+   return -EINVAL;


may as well combine these branches?

if (!fences || !status || !fence_count)
return -EINVAL;

as fence_count is unsigned.


Yeah, that makes some sense, but I decided to keep the separate 
if-statements because other functions are written like this as well.


Thanks,
Nicolai





Kind Regards,
Edward.


+   for (i = 0; i < fence_count; i++) {
+   if (NULL == fences[i].context)
+   return -EINVAL;
+   if (fences[i].ip_type >= AMDGPU_HW_IP_NUM)
+   return -EINVAL;
+   if (fences[i].ring >= AMDGPU_CS_MAX_RINGS)
+   return -EINVAL;
+   }
+
+   *status = 0;
+
+   r = amdgpu_ioctl_wait_fences(fences, fence_count, wait_all, timeout_ns,
+status, first);
+
+   return r;
+}
+
 int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem)
 {
struct amdgpu_semaphore *gpu_semaphore;

if (NULL == sem)
return -EINVAL;

gpu_semaphore = calloc(1, sizeof(struct amdgpu_semaphore));
if (NULL == gpu_semaphore)
return -ENOMEM;






--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH libdrm 1/2] amdgpu: add the interface of waiting multiple fences

2017-04-18 Thread Edward O'Callaghan


On 04/19/2017 04:28 AM, Nicolai Hähnle wrote:
> On 18.04.2017 17:47, Edward O'Callaghan wrote:
>> On 04/14/2017 12:47 AM, Nicolai Hähnle wrote:
>>> From: Nicolai Hähnle 
>>>
>>> Signed-off-by: Junwei Zhang 
>>> [v2: allow returning the first signaled fence index]
>>> Signed-off-by: monk.liu 
>>> [v3:
>>>  - cleanup *status setting
>>>  - fix amdgpu symbols check]
>>> Signed-off-by: Nicolai Hähnle 
>>> Reviewed-by: Christian König  (v1)
>>> Reviewed-by: Jammy Zhou  (v1)
>>> ---
>>>  amdgpu/amdgpu-symbol-check |  1 +
>>>  amdgpu/amdgpu.h| 23 ++
>>>  amdgpu/amdgpu_cs.c | 74
>>> ++
>>>  3 files changed, 98 insertions(+)
>>>
> [snip]
>>> +static int amdgpu_ioctl_wait_fences(struct amdgpu_cs_fence *fences,
>>> +uint32_t fence_count,
>>> +bool wait_all,
>>> +uint64_t timeout_ns,
>>> +uint32_t *status,
>>> +uint32_t *first)
>>> +{
>>> +struct drm_amdgpu_fence *drm_fences;
>>> +amdgpu_device_handle dev = fences[0].context->dev;
>>> +union drm_amdgpu_wait_fences args;
>>> +int r;
>>> +uint32_t i;
>>> +
>>> +drm_fences = alloca(sizeof(struct drm_amdgpu_fence) * fence_count);
>>> +for (i = 0; i < fence_count; i++) {
>>> +drm_fences[i].ctx_id = fences[i].context->id;
>>> +drm_fences[i].ip_type = fences[i].ip_type;
>>> +drm_fences[i].ip_instance = fences[i].ip_instance;
>>> +drm_fences[i].ring = fences[i].ring;
>>> +drm_fences[i].seq_no = fences[i].fence;
>>> +}
>>> +
>>> +memset(&args, 0, sizeof(args));
>>> +args.in.fences = (uint64_t)(uintptr_t)drm_fences;
>>> +args.in.fence_count = fence_count;
>>> +args.in.wait_all = wait_all;
>>> +args.in.timeout_ns = amdgpu_cs_calculate_timeout(timeout_ns);
>>> +
>>> +r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_FENCES, &args);
>>> +if (r)
>>> +return -errno;
>>
>> Hi Nicolai,
>>
>> you will leak drm_fences here on the error branch.
> 
> It's an alloca, so it's automatically freed when the function returns.
> 
> 
>>> +
>>> +*status = args.out.status;
>>> +
>>> +if (first)
>>> +*first = args.out.first_signaled;
>>> +
>>> +return 0;
>>> +}
>>> +
>>> +int amdgpu_cs_wait_fences(struct amdgpu_cs_fence *fences,
>>> +  uint32_t fence_count,
>>> +  bool wait_all,
>>> +  uint64_t timeout_ns,
>>> +  uint32_t *status,
>>> +  uint32_t *first)
>>> +{
>>> +uint32_t i;
>>> +int r;
>>
>> no need for a intermediate ret, just return amdgpu_ioctl_wait_fences()
>> directly?
> 
> Good point, I'll change that before I push.
> 
> 
>>> +
>>> +/* Sanity check */
>>> +if (NULL == fences)
>>> +return -EINVAL;
>>> +if (NULL == status)
>>> +return -EINVAL;
>>> +if (fence_count <= 0)
>>> +return -EINVAL;
>>
>> may as well combine these branches?
>>
>> if (!fences || !status || !fence_count)
>> return -EINVAL;
>>
>> as fence_count is unsigned.
> 
> Yeah, that makes some sense, but I decided to keep the separate
> if-statements because other functions are written like this as well.

Its not completely consistent actually, I sent in a patch last night to
fix the rest.

Cheers,
Edward.

> 
> Thanks,
> Nicolai
> 
> 
> 
>>
>> Kind Regards,
>> Edward.
>>
>>> +for (i = 0; i < fence_count; i++) {
>>> +if (NULL == fences[i].context)
>>> +return -EINVAL;
>>> +if (fences[i].ip_type >= AMDGPU_HW_IP_NUM)
>>> +return -EINVAL;
>>> +if (fences[i].ring >= AMDGPU_CS_MAX_RINGS)
>>> +return -EINVAL;
>>> +}
>>> +
>>> +*status = 0;
>>> +
>>> +r = amdgpu_ioctl_wait_fences(fences, fence_count, wait_all,
>>> timeout_ns,
>>> + status, first);
>>> +
>>> +return r;
>>> +}
>>> +
>>>  int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem)
>>>  {
>>>  struct amdgpu_semaphore *gpu_semaphore;
>>>
>>>  if (NULL == sem)
>>>  return -EINVAL;
>>>
>>>  gpu_semaphore = calloc(1, sizeof(struct amdgpu_semaphore));
>>>  if (NULL == gpu_semaphore)
>>>  return -ENOMEM;
>>>
>>
> 
> 



signature.asc
Description: OpenPGP digital signature
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx