Re: [PATCH 3/3] drm/v3d: add multiple syncobjs support

2021-09-16 Thread Melissa Wen
On 09/16, Iago Toral wrote:
> I think this looks good overall, I have a few questions/commments
> below:
> 
> On Wed, 2021-08-18 at 18:57 +0100, Melissa Wen wrote:
> > Using the generic extension support set in the previous patch, this
> > patch enables more than one in/out binary syncobj per job submission.
> > Arrays of syncobjs are set in a specific extension type (multisync)
> > that also cares of determining the stage for sync (bin/render)
> > through a flag - when this is the case.
> > 
> > Signed-off-by: Melissa Wen 
> > ---
> >  drivers/gpu/drm/v3d/v3d_drv.c |   3 +
> >  drivers/gpu/drm/v3d/v3d_drv.h |  14 +++
> >  drivers/gpu/drm/v3d/v3d_gem.c | 209 +++-
> > --
> >  include/uapi/drm/v3d_drm.h|  38 +++
> >  4 files changed, 226 insertions(+), 38 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/v3d/v3d_drv.c
> > b/drivers/gpu/drm/v3d/v3d_drv.c
> > index 6a0516160bb2..939ca8c833f5 100644
> > --- a/drivers/gpu/drm/v3d/v3d_drv.c
> > +++ b/drivers/gpu/drm/v3d/v3d_drv.c
> > @@ -96,6 +96,9 @@ static int v3d_get_param_ioctl(struct drm_device
> > *dev, void *data,
> > case DRM_V3D_PARAM_SUPPORTS_PERFMON:
> > args->value = (v3d->ver >= 40);
> > return 0;
> > +   case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
> > +   args->value = 1;
> > +   return 0;
> > default:
> > DRM_DEBUG("Unknown parameter %d\n", args->param);
> > return -EINVAL;
> > diff --git a/drivers/gpu/drm/v3d/v3d_drv.h
> > b/drivers/gpu/drm/v3d/v3d_drv.h
> > index b900a050d5e2..544c60404a0f 100644
> > --- a/drivers/gpu/drm/v3d/v3d_drv.h
> > +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> > @@ -294,6 +294,20 @@ struct v3d_csd_job {
> > struct drm_v3d_submit_csd args;
> >  };
> >  
> > +struct v3d_submit_outsync {
> > +   struct drm_syncobj *syncobj;
> > +};
> > +
> > +struct v3d_submit_ext {
> > +   u32 flags;
> > +
> > +   u32 in_sync_count;
> > +   u64 in_syncs;
> > +
> > +   u32 out_sync_count;
> > +   struct v3d_submit_outsync *out_syncs;
> > +};
> > +
> >  /**
> >   * __wait_for - magic wait macro
> >   *
> > diff --git a/drivers/gpu/drm/v3d/v3d_gem.c
> > b/drivers/gpu/drm/v3d/v3d_gem.c
> > index e254919b6c5e..e7aabe1a0e11 100644
> > --- a/drivers/gpu/drm/v3d/v3d_gem.c
> > +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> > @@ -392,6 +392,9 @@ v3d_render_job_free(struct kref *ref)
> >  
> >  void v3d_job_cleanup(struct v3d_job *job)
> >  {
> > +   if (!job)
> > +   return;
> > +
> > drm_sched_job_cleanup(>base);
> > v3d_job_put(job);
> >  }
> > @@ -451,10 +454,11 @@ v3d_job_add_deps(struct drm_file *file_priv,
> > struct v3d_job *job,
> >  static int
> >  v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
> >  struct v3d_job *job, void (*free)(struct kref *ref),
> > -u32 in_sync, enum v3d_queue queue)
> > +u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue
> > queue)
> >  {
> > struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
> > -   int ret;
> > +   bool has_multinsync = (se && se->in_sync_count);
> > +   int ret, i;
> >  
> > job->v3d = v3d;
> > job->free = free;
> > @@ -463,14 +467,30 @@ v3d_job_init(struct v3d_dev *v3d, struct
> > drm_file *file_priv,
> > if (ret < 0)
> > return ret;
> >  
> > -   ret = drm_sched_job_init(>base, _priv-
> > >sched_entity[queue],
> > -v3d_priv);
> > +   ret = drm_sched_job_init(>base, _priv-
> > >sched_entity[queue], v3d_priv);
> > if (ret)
> > goto fail;
> >  
> > -   ret = v3d_job_add_deps(file_priv, job, in_sync, 0);
> > -   if (ret)
> > -   goto fail_job;
> > +   if (has_multinsync && (se->flags == (queue == V3D_RENDER))) {
> 
> I think this is unnecessarily difficult to understand, I'd suggest to
> code the condition more explicitly:
> 
> if (has_multisync &&
> (see->flags & DRM_V3D_IN_SYNC_RCL) &&
> queue ==  V3D_RENDER)
> 
> unless I am missing the point here :)

more or less it.. the condition should be true if (IN_SYNC_RCL flag and
queue is V3_RENDER) and also if (no IN_SYNC_RCL and queue is not
V3_RENDER).

anyway, you are right it needs to be improved and more understable, I
will work on it for the next version.

> 
> > +   struct drm_v3d_sem __user *handle = u64_to_user_ptr(se-
> > >in_syncs);
> > +
> > +   for (i = 0; i < se->in_sync_count; i++) {
> > +   struct drm_v3d_sem in;
> > +
> > +   ret = copy_from_user(, handle++, sizeof(
> > in));
> > +   if (ret) {
> > +   DRM_DEBUG("Failed to copy wait dep
> > handle.\n");
> > +   goto fail_job;
> > +   }
> > +   ret = v3d_job_add_deps(file_priv, job,
> > in.handle, 0);
> > +   if (ret)
> > +   goto fail_job;
> > +   }
> > +   } else if (!has_multinsync) {
> > +   ret = v3d_job_add_deps(file_priv, job, in_sync, 0);

Re: [PATCH 3/3] drm/v3d: add multiple syncobjs support

2021-09-16 Thread Iago Toral
I think this looks good overall, I have a few questions/commments
below:

On Wed, 2021-08-18 at 18:57 +0100, Melissa Wen wrote:
> Using the generic extension support set in the previous patch, this
> patch enables more than one in/out binary syncobj per job submission.
> Arrays of syncobjs are set in a specific extension type (multisync)
> that also cares of determining the stage for sync (bin/render)
> through a flag - when this is the case.
> 
> Signed-off-by: Melissa Wen 
> ---
>  drivers/gpu/drm/v3d/v3d_drv.c |   3 +
>  drivers/gpu/drm/v3d/v3d_drv.h |  14 +++
>  drivers/gpu/drm/v3d/v3d_gem.c | 209 +++-
> --
>  include/uapi/drm/v3d_drm.h|  38 +++
>  4 files changed, 226 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.c
> b/drivers/gpu/drm/v3d/v3d_drv.c
> index 6a0516160bb2..939ca8c833f5 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.c
> +++ b/drivers/gpu/drm/v3d/v3d_drv.c
> @@ -96,6 +96,9 @@ static int v3d_get_param_ioctl(struct drm_device
> *dev, void *data,
>   case DRM_V3D_PARAM_SUPPORTS_PERFMON:
>   args->value = (v3d->ver >= 40);
>   return 0;
> + case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
> + args->value = 1;
> + return 0;
>   default:
>   DRM_DEBUG("Unknown parameter %d\n", args->param);
>   return -EINVAL;
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.h
> b/drivers/gpu/drm/v3d/v3d_drv.h
> index b900a050d5e2..544c60404a0f 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.h
> +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> @@ -294,6 +294,20 @@ struct v3d_csd_job {
>   struct drm_v3d_submit_csd args;
>  };
>  
> +struct v3d_submit_outsync {
> + struct drm_syncobj *syncobj;
> +};
> +
> +struct v3d_submit_ext {
> + u32 flags;
> +
> + u32 in_sync_count;
> + u64 in_syncs;
> +
> + u32 out_sync_count;
> + struct v3d_submit_outsync *out_syncs;
> +};
> +
>  /**
>   * __wait_for - magic wait macro
>   *
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c
> b/drivers/gpu/drm/v3d/v3d_gem.c
> index e254919b6c5e..e7aabe1a0e11 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -392,6 +392,9 @@ v3d_render_job_free(struct kref *ref)
>  
>  void v3d_job_cleanup(struct v3d_job *job)
>  {
> + if (!job)
> + return;
> +
>   drm_sched_job_cleanup(>base);
>   v3d_job_put(job);
>  }
> @@ -451,10 +454,11 @@ v3d_job_add_deps(struct drm_file *file_priv,
> struct v3d_job *job,
>  static int
>  v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
>struct v3d_job *job, void (*free)(struct kref *ref),
> -  u32 in_sync, enum v3d_queue queue)
> +  u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue
> queue)
>  {
>   struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
> - int ret;
> + bool has_multinsync = (se && se->in_sync_count);
> + int ret, i;
>  
>   job->v3d = v3d;
>   job->free = free;
> @@ -463,14 +467,30 @@ v3d_job_init(struct v3d_dev *v3d, struct
> drm_file *file_priv,
>   if (ret < 0)
>   return ret;
>  
> - ret = drm_sched_job_init(>base, _priv-
> >sched_entity[queue],
> -  v3d_priv);
> + ret = drm_sched_job_init(>base, _priv-
> >sched_entity[queue], v3d_priv);
>   if (ret)
>   goto fail;
>  
> - ret = v3d_job_add_deps(file_priv, job, in_sync, 0);
> - if (ret)
> - goto fail_job;
> + if (has_multinsync && (se->flags == (queue == V3D_RENDER))) {

I think this is unnecessarily difficult to understand, I'd suggest to
code the condition more explicitly:

if (has_multisync &&
(see->flags & DRM_V3D_IN_SYNC_RCL) &&
queue ==  V3D_RENDER)

unless I am missing the point here :)

> + struct drm_v3d_sem __user *handle = u64_to_user_ptr(se-
> >in_syncs);
> +
> + for (i = 0; i < se->in_sync_count; i++) {
> + struct drm_v3d_sem in;
> +
> + ret = copy_from_user(, handle++, sizeof(
> in));
> + if (ret) {
> + DRM_DEBUG("Failed to copy wait dep
> handle.\n");
> + goto fail_job;
> + }
> + ret = v3d_job_add_deps(file_priv, job,
> in.handle, 0);
> + if (ret)
> + goto fail_job;
> + }
> + } else if (!has_multinsync) {
> + ret = v3d_job_add_deps(file_priv, job, in_sync, 0);
> + if (ret)
> + goto fail_job;
> + }
>  
>   kref_init(>refcount);
>  
> @@ -500,6 +520,7 @@ v3d_attach_fences_and_unlock_reservation(struct
> drm_file *file_priv,
>struct v3d_job *job,
>struct ww_acquire_ctx
> *acquire_ctx,
>u32 out_sync,
> +  struct 

[PATCH 3/3] drm/v3d: add multiple syncobjs support

2021-08-18 Thread Melissa Wen
Using the generic extension support set in the previous patch, this
patch enables more than one in/out binary syncobj per job submission.
Arrays of syncobjs are set in a specific extension type (multisync)
that also cares of determining the stage for sync (bin/render)
through a flag - when this is the case.

Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/v3d/v3d_drv.c |   3 +
 drivers/gpu/drm/v3d/v3d_drv.h |  14 +++
 drivers/gpu/drm/v3d/v3d_gem.c | 209 +++---
 include/uapi/drm/v3d_drm.h|  38 +++
 4 files changed, 226 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index 6a0516160bb2..939ca8c833f5 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -96,6 +96,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void 
*data,
case DRM_V3D_PARAM_SUPPORTS_PERFMON:
args->value = (v3d->ver >= 40);
return 0;
+   case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
+   args->value = 1;
+   return 0;
default:
DRM_DEBUG("Unknown parameter %d\n", args->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index b900a050d5e2..544c60404a0f 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -294,6 +294,20 @@ struct v3d_csd_job {
struct drm_v3d_submit_csd args;
 };
 
+struct v3d_submit_outsync {
+   struct drm_syncobj *syncobj;
+};
+
+struct v3d_submit_ext {
+   u32 flags;
+
+   u32 in_sync_count;
+   u64 in_syncs;
+
+   u32 out_sync_count;
+   struct v3d_submit_outsync *out_syncs;
+};
+
 /**
  * __wait_for - magic wait macro
  *
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index e254919b6c5e..e7aabe1a0e11 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -392,6 +392,9 @@ v3d_render_job_free(struct kref *ref)
 
 void v3d_job_cleanup(struct v3d_job *job)
 {
+   if (!job)
+   return;
+
drm_sched_job_cleanup(>base);
v3d_job_put(job);
 }
@@ -451,10 +454,11 @@ v3d_job_add_deps(struct drm_file *file_priv, struct 
v3d_job *job,
 static int
 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 struct v3d_job *job, void (*free)(struct kref *ref),
-u32 in_sync, enum v3d_queue queue)
+u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue)
 {
struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
-   int ret;
+   bool has_multinsync = (se && se->in_sync_count);
+   int ret, i;
 
job->v3d = v3d;
job->free = free;
@@ -463,14 +467,30 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file 
*file_priv,
if (ret < 0)
return ret;
 
-   ret = drm_sched_job_init(>base, _priv->sched_entity[queue],
-v3d_priv);
+   ret = drm_sched_job_init(>base, _priv->sched_entity[queue], 
v3d_priv);
if (ret)
goto fail;
 
-   ret = v3d_job_add_deps(file_priv, job, in_sync, 0);
-   if (ret)
-   goto fail_job;
+   if (has_multinsync && (se->flags == (queue == V3D_RENDER))) {
+   struct drm_v3d_sem __user *handle = 
u64_to_user_ptr(se->in_syncs);
+
+   for (i = 0; i < se->in_sync_count; i++) {
+   struct drm_v3d_sem in;
+
+   ret = copy_from_user(, handle++, sizeof(in));
+   if (ret) {
+   DRM_DEBUG("Failed to copy wait dep handle.\n");
+   goto fail_job;
+   }
+   ret = v3d_job_add_deps(file_priv, job, in.handle, 0);
+   if (ret)
+   goto fail_job;
+   }
+   } else if (!has_multinsync) {
+   ret = v3d_job_add_deps(file_priv, job, in_sync, 0);
+   if (ret)
+   goto fail_job;
+   }
 
kref_init(>refcount);
 
@@ -500,6 +520,7 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file 
*file_priv,
 struct v3d_job *job,
 struct ww_acquire_ctx *acquire_ctx,
 u32 out_sync,
+struct v3d_submit_ext *se,
 struct dma_fence *done_fence)
 {
struct drm_syncobj *sync_out;
@@ -514,6 +535,18 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file 
*file_priv,
drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
 
/* Update the return sync object for the job */
+   /* If multiples semaphores is supported */
+   if (se && se->out_sync_count) {
+   for (i = 0; i < se->out_sync_count; i++) {
+

[RFC PATCH 3/3] drm/v3d: add multiple syncobjs support

2021-08-04 Thread Melissa Wen
Using the generic extension support set in the previous patch, this
patch enables more than one in/out binary syncobj per job submission.
Arrays of syncobjs are set in a specific extension type (multisync)
that also cares of determining the stage for sync (bin/render)
through a flag - when this is the case.

Signed-off-by: Melissa Wen 
---
 drivers/gpu/drm/v3d/v3d_drv.c |   3 +
 drivers/gpu/drm/v3d/v3d_drv.h |  14 +++
 drivers/gpu/drm/v3d/v3d_gem.c | 210 --
 include/uapi/drm/v3d_drm.h|  38 ++
 4 files changed, 231 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index 6a0516160bb2..939ca8c833f5 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -96,6 +96,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void 
*data,
case DRM_V3D_PARAM_SUPPORTS_PERFMON:
args->value = (v3d->ver >= 40);
return 0;
+   case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
+   args->value = 1;
+   return 0;
default:
DRM_DEBUG("Unknown parameter %d\n", args->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 270134779073..a4fdd7539691 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -299,6 +299,20 @@ struct v3d_csd_job {
struct drm_v3d_submit_csd args;
 };
 
+struct v3d_submit_outsync {
+   struct drm_syncobj *syncobj;
+};
+
+struct v3d_submit_ext {
+   u32 flags;
+
+   u32 in_sync_count;
+   u64 in_syncs;
+
+   u32 out_sync_count;
+   struct v3d_submit_outsync *out_syncs;
+};
+
 /**
  * __wait_for - magic wait macro
  *
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 22f3ef9f52d2..52ec52f6340e 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -452,9 +452,12 @@ v3d_job_add_deps(struct drm_file *file_priv, struct 
v3d_job *job,
 static int
 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 struct v3d_job *job, void (*free)(struct kref *ref),
-u32 in_sync)
+u32 in_sync, struct v3d_submit_ext *se, u32 rcl_sync_flag)
 {
-   int ret;
+   int ret, i;
+   struct drm_v3d_sem __user *handle = NULL;
+   struct dma_fence *in_fence = NULL;
+   unsigned long index;
 
job->v3d = v3d;
job->free = free;
@@ -465,14 +468,34 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file 
*file_priv,
 
xa_init_flags(>deps, XA_FLAGS_ALLOC);
 
-   ret = v3d_job_add_deps(file_priv, job, in_sync, 0);
-   if (ret)
-   goto fail;
+   if (!(se && se->in_sync_count)) {
+   ret = v3d_job_add_deps(file_priv, job, in_sync, 0);
+   if (ret)
+   goto fail;
+   } else if (se && se->in_sync_count && (se->flags & DRM_V3D_IN_SYNC_RCL) 
== rcl_sync_flag) {
+   handle = u64_to_user_ptr(se->in_syncs);
+
+   for (i = 0; i < se->in_sync_count; i++) {
+   struct drm_v3d_sem in;
+
+   ret = copy_from_user(, handle++, sizeof(in));
+   if (ret) {
+   DRM_DEBUG("Failed to copy wait dep handle.\n");
+   goto fail;
+   }
+
+   ret = v3d_job_add_deps(file_priv, job, in.handle, 0);
+   if (ret)
+   goto fail;
+   }
+   }
 
kref_init(>refcount);
 
return 0;
 fail:
+   xa_for_each(>deps, index, in_fence)
+   dma_fence_put(in_fence);
xa_destroy(>deps);
pm_runtime_put_autosuspend(v3d->drm.dev);
return ret;
@@ -504,6 +527,7 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file 
*file_priv,
 struct v3d_job *job,
 struct ww_acquire_ctx *acquire_ctx,
 u32 out_sync,
+struct v3d_submit_ext *se,
 struct dma_fence *done_fence)
 {
struct drm_syncobj *sync_out;
@@ -518,6 +542,18 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file 
*file_priv,
drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
 
/* Update the return sync object for the job */
+   /* If multiples semaphores is supported */
+   if (se && se->out_sync_count) {
+   for (i = 0; i < se->out_sync_count; i++) {
+   drm_syncobj_replace_fence(se->out_syncs[i].syncobj,
+ done_fence);
+   drm_syncobj_put(se->out_syncs[i].syncobj);
+   }
+   kvfree(se->out_syncs);
+   return;
+   }
+
+