Re: [PATCH 3/3] drm/v3d: add multiple syncobjs support
On 09/16, Iago Toral wrote: > I think this looks good overall, I have a few questions/commments > below: > > On Wed, 2021-08-18 at 18:57 +0100, Melissa Wen wrote: > > Using the generic extension support set in the previous patch, this > > patch enables more than one in/out binary syncobj per job submission. > > Arrays of syncobjs are set in a specific extension type (multisync) > > that also cares of determining the stage for sync (bin/render) > > through a flag - when this is the case. > > > > Signed-off-by: Melissa Wen > > --- > > drivers/gpu/drm/v3d/v3d_drv.c | 3 + > > drivers/gpu/drm/v3d/v3d_drv.h | 14 +++ > > drivers/gpu/drm/v3d/v3d_gem.c | 209 +++- > > -- > > include/uapi/drm/v3d_drm.h| 38 +++ > > 4 files changed, 226 insertions(+), 38 deletions(-) > > > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.c > > b/drivers/gpu/drm/v3d/v3d_drv.c > > index 6a0516160bb2..939ca8c833f5 100644 > > --- a/drivers/gpu/drm/v3d/v3d_drv.c > > +++ b/drivers/gpu/drm/v3d/v3d_drv.c > > @@ -96,6 +96,9 @@ static int v3d_get_param_ioctl(struct drm_device > > *dev, void *data, > > case DRM_V3D_PARAM_SUPPORTS_PERFMON: > > args->value = (v3d->ver >= 40); > > return 0; > > + case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT: > > + args->value = 1; > > + return 0; > > default: > > DRM_DEBUG("Unknown parameter %d\n", args->param); > > return -EINVAL; > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.h > > b/drivers/gpu/drm/v3d/v3d_drv.h > > index b900a050d5e2..544c60404a0f 100644 > > --- a/drivers/gpu/drm/v3d/v3d_drv.h > > +++ b/drivers/gpu/drm/v3d/v3d_drv.h > > @@ -294,6 +294,20 @@ struct v3d_csd_job { > > struct drm_v3d_submit_csd args; > > }; > > > > +struct v3d_submit_outsync { > > + struct drm_syncobj *syncobj; > > +}; > > + > > +struct v3d_submit_ext { > > + u32 flags; > > + > > + u32 in_sync_count; > > + u64 in_syncs; > > + > > + u32 out_sync_count; > > + struct v3d_submit_outsync *out_syncs; > > +}; > > + > > /** > > * __wait_for - magic wait macro > > * > > diff --git a/drivers/gpu/drm/v3d/v3d_gem.c > > b/drivers/gpu/drm/v3d/v3d_gem.c > > index e254919b6c5e..e7aabe1a0e11 100644 > > --- a/drivers/gpu/drm/v3d/v3d_gem.c > > +++ b/drivers/gpu/drm/v3d/v3d_gem.c > > @@ -392,6 +392,9 @@ v3d_render_job_free(struct kref *ref) > > > > void v3d_job_cleanup(struct v3d_job *job) > > { > > + if (!job) > > + return; > > + > > drm_sched_job_cleanup(>base); > > v3d_job_put(job); > > } > > @@ -451,10 +454,11 @@ v3d_job_add_deps(struct drm_file *file_priv, > > struct v3d_job *job, > > static int > > v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, > > struct v3d_job *job, void (*free)(struct kref *ref), > > -u32 in_sync, enum v3d_queue queue) > > +u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue > > queue) > > { > > struct v3d_file_priv *v3d_priv = file_priv->driver_priv; > > - int ret; > > + bool has_multinsync = (se && se->in_sync_count); > > + int ret, i; > > > > job->v3d = v3d; > > job->free = free; > > @@ -463,14 +467,30 @@ v3d_job_init(struct v3d_dev *v3d, struct > > drm_file *file_priv, > > if (ret < 0) > > return ret; > > > > - ret = drm_sched_job_init(>base, _priv- > > >sched_entity[queue], > > -v3d_priv); > > + ret = drm_sched_job_init(>base, _priv- > > >sched_entity[queue], v3d_priv); > > if (ret) > > goto fail; > > > > - ret = v3d_job_add_deps(file_priv, job, in_sync, 0); > > - if (ret) > > - goto fail_job; > > + if (has_multinsync && (se->flags == (queue == V3D_RENDER))) { > > I think this is unnecessarily difficult to understand, I'd suggest to > code the condition more explicitly: > > if (has_multisync && > (see->flags & DRM_V3D_IN_SYNC_RCL) && > queue == V3D_RENDER) > > unless I am missing the point here :) more or less it.. the condition should be true if (IN_SYNC_RCL flag and queue is V3_RENDER) and also if (no IN_SYNC_RCL and queue is not V3_RENDER). anyway, you are right it needs to be improved and more understable, I will work on it for the next version. > > > + struct drm_v3d_sem __user *handle = u64_to_user_ptr(se- > > >in_syncs); > > + > > + for (i = 0; i < se->in_sync_count; i++) { > > + struct drm_v3d_sem in; > > + > > + ret = copy_from_user(, handle++, sizeof( > > in)); > > + if (ret) { > > + DRM_DEBUG("Failed to copy wait dep > > handle.\n"); > > + goto fail_job; > > + } > > + ret = v3d_job_add_deps(file_priv, job, > > in.handle, 0); > > + if (ret) > > + goto fail_job; > > + } > > + } else if (!has_multinsync) { > > + ret = v3d_job_add_deps(file_priv, job, in_sync, 0);
Re: [PATCH 3/3] drm/v3d: add multiple syncobjs support
I think this looks good overall, I have a few questions/commments below: On Wed, 2021-08-18 at 18:57 +0100, Melissa Wen wrote: > Using the generic extension support set in the previous patch, this > patch enables more than one in/out binary syncobj per job submission. > Arrays of syncobjs are set in a specific extension type (multisync) > that also cares of determining the stage for sync (bin/render) > through a flag - when this is the case. > > Signed-off-by: Melissa Wen > --- > drivers/gpu/drm/v3d/v3d_drv.c | 3 + > drivers/gpu/drm/v3d/v3d_drv.h | 14 +++ > drivers/gpu/drm/v3d/v3d_gem.c | 209 +++- > -- > include/uapi/drm/v3d_drm.h| 38 +++ > 4 files changed, 226 insertions(+), 38 deletions(-) > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.c > b/drivers/gpu/drm/v3d/v3d_drv.c > index 6a0516160bb2..939ca8c833f5 100644 > --- a/drivers/gpu/drm/v3d/v3d_drv.c > +++ b/drivers/gpu/drm/v3d/v3d_drv.c > @@ -96,6 +96,9 @@ static int v3d_get_param_ioctl(struct drm_device > *dev, void *data, > case DRM_V3D_PARAM_SUPPORTS_PERFMON: > args->value = (v3d->ver >= 40); > return 0; > + case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT: > + args->value = 1; > + return 0; > default: > DRM_DEBUG("Unknown parameter %d\n", args->param); > return -EINVAL; > diff --git a/drivers/gpu/drm/v3d/v3d_drv.h > b/drivers/gpu/drm/v3d/v3d_drv.h > index b900a050d5e2..544c60404a0f 100644 > --- a/drivers/gpu/drm/v3d/v3d_drv.h > +++ b/drivers/gpu/drm/v3d/v3d_drv.h > @@ -294,6 +294,20 @@ struct v3d_csd_job { > struct drm_v3d_submit_csd args; > }; > > +struct v3d_submit_outsync { > + struct drm_syncobj *syncobj; > +}; > + > +struct v3d_submit_ext { > + u32 flags; > + > + u32 in_sync_count; > + u64 in_syncs; > + > + u32 out_sync_count; > + struct v3d_submit_outsync *out_syncs; > +}; > + > /** > * __wait_for - magic wait macro > * > diff --git a/drivers/gpu/drm/v3d/v3d_gem.c > b/drivers/gpu/drm/v3d/v3d_gem.c > index e254919b6c5e..e7aabe1a0e11 100644 > --- a/drivers/gpu/drm/v3d/v3d_gem.c > +++ b/drivers/gpu/drm/v3d/v3d_gem.c > @@ -392,6 +392,9 @@ v3d_render_job_free(struct kref *ref) > > void v3d_job_cleanup(struct v3d_job *job) > { > + if (!job) > + return; > + > drm_sched_job_cleanup(>base); > v3d_job_put(job); > } > @@ -451,10 +454,11 @@ v3d_job_add_deps(struct drm_file *file_priv, > struct v3d_job *job, > static int > v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, >struct v3d_job *job, void (*free)(struct kref *ref), > - u32 in_sync, enum v3d_queue queue) > + u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue > queue) > { > struct v3d_file_priv *v3d_priv = file_priv->driver_priv; > - int ret; > + bool has_multinsync = (se && se->in_sync_count); > + int ret, i; > > job->v3d = v3d; > job->free = free; > @@ -463,14 +467,30 @@ v3d_job_init(struct v3d_dev *v3d, struct > drm_file *file_priv, > if (ret < 0) > return ret; > > - ret = drm_sched_job_init(>base, _priv- > >sched_entity[queue], > - v3d_priv); > + ret = drm_sched_job_init(>base, _priv- > >sched_entity[queue], v3d_priv); > if (ret) > goto fail; > > - ret = v3d_job_add_deps(file_priv, job, in_sync, 0); > - if (ret) > - goto fail_job; > + if (has_multinsync && (se->flags == (queue == V3D_RENDER))) { I think this is unnecessarily difficult to understand, I'd suggest to code the condition more explicitly: if (has_multisync && (see->flags & DRM_V3D_IN_SYNC_RCL) && queue == V3D_RENDER) unless I am missing the point here :) > + struct drm_v3d_sem __user *handle = u64_to_user_ptr(se- > >in_syncs); > + > + for (i = 0; i < se->in_sync_count; i++) { > + struct drm_v3d_sem in; > + > + ret = copy_from_user(, handle++, sizeof( > in)); > + if (ret) { > + DRM_DEBUG("Failed to copy wait dep > handle.\n"); > + goto fail_job; > + } > + ret = v3d_job_add_deps(file_priv, job, > in.handle, 0); > + if (ret) > + goto fail_job; > + } > + } else if (!has_multinsync) { > + ret = v3d_job_add_deps(file_priv, job, in_sync, 0); > + if (ret) > + goto fail_job; > + } > > kref_init(>refcount); > > @@ -500,6 +520,7 @@ v3d_attach_fences_and_unlock_reservation(struct > drm_file *file_priv, >struct v3d_job *job, >struct ww_acquire_ctx > *acquire_ctx, >u32 out_sync, > + struct
[PATCH 3/3] drm/v3d: add multiple syncobjs support
Using the generic extension support set in the previous patch, this patch enables more than one in/out binary syncobj per job submission. Arrays of syncobjs are set in a specific extension type (multisync) that also cares of determining the stage for sync (bin/render) through a flag - when this is the case. Signed-off-by: Melissa Wen --- drivers/gpu/drm/v3d/v3d_drv.c | 3 + drivers/gpu/drm/v3d/v3d_drv.h | 14 +++ drivers/gpu/drm/v3d/v3d_gem.c | 209 +++--- include/uapi/drm/v3d_drm.h| 38 +++ 4 files changed, 226 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index 6a0516160bb2..939ca8c833f5 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -96,6 +96,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data, case DRM_V3D_PARAM_SUPPORTS_PERFMON: args->value = (v3d->ver >= 40); return 0; + case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT: + args->value = 1; + return 0; default: DRM_DEBUG("Unknown parameter %d\n", args->param); return -EINVAL; diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index b900a050d5e2..544c60404a0f 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -294,6 +294,20 @@ struct v3d_csd_job { struct drm_v3d_submit_csd args; }; +struct v3d_submit_outsync { + struct drm_syncobj *syncobj; +}; + +struct v3d_submit_ext { + u32 flags; + + u32 in_sync_count; + u64 in_syncs; + + u32 out_sync_count; + struct v3d_submit_outsync *out_syncs; +}; + /** * __wait_for - magic wait macro * diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index e254919b6c5e..e7aabe1a0e11 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -392,6 +392,9 @@ v3d_render_job_free(struct kref *ref) void v3d_job_cleanup(struct v3d_job *job) { + if (!job) + return; + drm_sched_job_cleanup(>base); v3d_job_put(job); } @@ -451,10 +454,11 @@ v3d_job_add_deps(struct drm_file *file_priv, struct v3d_job *job, static int v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, struct v3d_job *job, void (*free)(struct kref *ref), -u32 in_sync, enum v3d_queue queue) +u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue) { struct v3d_file_priv *v3d_priv = file_priv->driver_priv; - int ret; + bool has_multinsync = (se && se->in_sync_count); + int ret, i; job->v3d = v3d; job->free = free; @@ -463,14 +467,30 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, if (ret < 0) return ret; - ret = drm_sched_job_init(>base, _priv->sched_entity[queue], -v3d_priv); + ret = drm_sched_job_init(>base, _priv->sched_entity[queue], v3d_priv); if (ret) goto fail; - ret = v3d_job_add_deps(file_priv, job, in_sync, 0); - if (ret) - goto fail_job; + if (has_multinsync && (se->flags == (queue == V3D_RENDER))) { + struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs); + + for (i = 0; i < se->in_sync_count; i++) { + struct drm_v3d_sem in; + + ret = copy_from_user(, handle++, sizeof(in)); + if (ret) { + DRM_DEBUG("Failed to copy wait dep handle.\n"); + goto fail_job; + } + ret = v3d_job_add_deps(file_priv, job, in.handle, 0); + if (ret) + goto fail_job; + } + } else if (!has_multinsync) { + ret = v3d_job_add_deps(file_priv, job, in_sync, 0); + if (ret) + goto fail_job; + } kref_init(>refcount); @@ -500,6 +520,7 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, struct v3d_job *job, struct ww_acquire_ctx *acquire_ctx, u32 out_sync, +struct v3d_submit_ext *se, struct dma_fence *done_fence) { struct drm_syncobj *sync_out; @@ -514,6 +535,18 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); /* Update the return sync object for the job */ + /* If multiples semaphores is supported */ + if (se && se->out_sync_count) { + for (i = 0; i < se->out_sync_count; i++) { +
[RFC PATCH 3/3] drm/v3d: add multiple syncobjs support
Using the generic extension support set in the previous patch, this patch enables more than one in/out binary syncobj per job submission. Arrays of syncobjs are set in a specific extension type (multisync) that also cares of determining the stage for sync (bin/render) through a flag - when this is the case. Signed-off-by: Melissa Wen --- drivers/gpu/drm/v3d/v3d_drv.c | 3 + drivers/gpu/drm/v3d/v3d_drv.h | 14 +++ drivers/gpu/drm/v3d/v3d_gem.c | 210 -- include/uapi/drm/v3d_drm.h| 38 ++ 4 files changed, 231 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index 6a0516160bb2..939ca8c833f5 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -96,6 +96,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data, case DRM_V3D_PARAM_SUPPORTS_PERFMON: args->value = (v3d->ver >= 40); return 0; + case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT: + args->value = 1; + return 0; default: DRM_DEBUG("Unknown parameter %d\n", args->param); return -EINVAL; diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 270134779073..a4fdd7539691 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -299,6 +299,20 @@ struct v3d_csd_job { struct drm_v3d_submit_csd args; }; +struct v3d_submit_outsync { + struct drm_syncobj *syncobj; +}; + +struct v3d_submit_ext { + u32 flags; + + u32 in_sync_count; + u64 in_syncs; + + u32 out_sync_count; + struct v3d_submit_outsync *out_syncs; +}; + /** * __wait_for - magic wait macro * diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 22f3ef9f52d2..52ec52f6340e 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -452,9 +452,12 @@ v3d_job_add_deps(struct drm_file *file_priv, struct v3d_job *job, static int v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, struct v3d_job *job, void (*free)(struct kref *ref), -u32 in_sync) +u32 in_sync, struct v3d_submit_ext *se, u32 rcl_sync_flag) { - int ret; + int ret, i; + struct drm_v3d_sem __user *handle = NULL; + struct dma_fence *in_fence = NULL; + unsigned long index; job->v3d = v3d; job->free = free; @@ -465,14 +468,34 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, xa_init_flags(>deps, XA_FLAGS_ALLOC); - ret = v3d_job_add_deps(file_priv, job, in_sync, 0); - if (ret) - goto fail; + if (!(se && se->in_sync_count)) { + ret = v3d_job_add_deps(file_priv, job, in_sync, 0); + if (ret) + goto fail; + } else if (se && se->in_sync_count && (se->flags & DRM_V3D_IN_SYNC_RCL) == rcl_sync_flag) { + handle = u64_to_user_ptr(se->in_syncs); + + for (i = 0; i < se->in_sync_count; i++) { + struct drm_v3d_sem in; + + ret = copy_from_user(, handle++, sizeof(in)); + if (ret) { + DRM_DEBUG("Failed to copy wait dep handle.\n"); + goto fail; + } + + ret = v3d_job_add_deps(file_priv, job, in.handle, 0); + if (ret) + goto fail; + } + } kref_init(>refcount); return 0; fail: + xa_for_each(>deps, index, in_fence) + dma_fence_put(in_fence); xa_destroy(>deps); pm_runtime_put_autosuspend(v3d->drm.dev); return ret; @@ -504,6 +527,7 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, struct v3d_job *job, struct ww_acquire_ctx *acquire_ctx, u32 out_sync, +struct v3d_submit_ext *se, struct dma_fence *done_fence) { struct drm_syncobj *sync_out; @@ -518,6 +542,18 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); /* Update the return sync object for the job */ + /* If multiples semaphores is supported */ + if (se && se->out_sync_count) { + for (i = 0; i < se->out_sync_count; i++) { + drm_syncobj_replace_fence(se->out_syncs[i].syncobj, + done_fence); + drm_syncobj_put(se->out_syncs[i].syncobj); + } + kvfree(se->out_syncs); + return; + } + +