On Wed, Aug 9, 2017 at 11:25 AM, Christian König <deathsim...@vodafone.de> wrote:
> Am 09.08.2017 um 19:57 schrieb Chris Wilson: > >> Quoting Jason Ekstrand (2017-08-09 18:00:54) >> >>> Vulkan VkFence semantics require that the application be able to perform >>> a CPU wait on work which may not yet have been submitted. This is >>> perfectly safe because the CPU wait has a timeout which will get >>> triggered eventually if no work is ever submitted. This behavior is >>> advantageous for multi-threaded workloads because, so long as all of the >>> threads agree on what fences to use up-front, you don't have the extra >>> cross-thread synchronization cost of thread A telling thread B that it >>> has submitted its dependent work and thread B is now free to wait. >>> >>> Within a single process, this can be implemented in the userspace driver >>> by doing exactly the same kind of tracking the app would have to do >>> using posix condition variables or similar. However, in order for this >>> to work cross-process (as is required by VK_KHR_external_fence), we need >>> to handle this in the kernel. >>> >>> This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which >>> instructs the IOCTL to wait for the syncobj to have a non-null fence and >>> then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can >>> easily get the Vulkan behavior. >>> >>> v2: >>> - Fix a bug in the invalid syncobj error path >>> - Unify the wait-all and wait-any cases >>> >>> Signed-off-by: Jason Ekstrand <ja...@jlekstrand.net> >>> Cc: Dave Airlie <airl...@redhat.com> >>> --- >>> >>> I realized today (this is what happens when you sleep) that it takes >>> almost >>> no work to make the wait-any path also handle wait-all. By unifying the >>> two, I deleted over a hundred lines of code from the implementation. >>> >>> drivers/gpu/drm/drm_syncobj.c | 243 ++++++++++++++++++++++++++++++ >>> ++++-------- >>> include/uapi/drm/drm.h | 1 + >>> 2 files changed, 199 insertions(+), 45 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/drm_syncobj.c >>> b/drivers/gpu/drm/drm_syncobj.c >>> index 510dfc2..5e7f654 100644 >>> --- a/drivers/gpu/drm/drm_syncobj.c >>> +++ b/drivers/gpu/drm/drm_syncobj.c >>> @@ -51,6 +51,7 @@ >>> #include <linux/fs.h> >>> #include <linux/anon_inodes.h> >>> #include <linux/sync_file.h> >>> +#include <linux/sched/signal.h> >>> #include "drm_internal.h" >>> #include <drm/drm_syncobj.h> >>> @@ -104,6 +105,27 @@ static void drm_syncobj_add_callback_locked(struct >>> drm_syncobj *syncobj, >>> >> This is a bit of the puzzle that is missing. >> > ? It's in the previous patch. > list_add_tail(&cb->node, &syncobj->cb_list); >>> } >>> +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj >>> *syncobj, >>> + struct dma_fence >>> **fence, >>> + struct drm_syncobj_cb >>> *cb, >>> + drm_syncobj_func_t func) >>> +{ >>> + int ret; >>> + >>> + spin_lock(&syncobj->lock); >>> + if (syncobj->fence) { >>> + *fence = dma_fence_get(syncobj->fence); >>> + ret = 1; >>> + } else { >>> + *fence = NULL; >>> + drm_syncobj_add_callback_locked(syncobj, cb, func); >>> + ret = 0; >>> + } >>> + spin_unlock(&syncobj->lock); >>> + >>> + return ret; >>> +} >>> + >>> /** >>> * drm_syncobj_add_callback - adds a callback to syncobj::cb_list >>> * @syncobj: Sync object to which to add the callback >>> @@ -526,6 +548,159 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device >>> *dev, void *data, >>> &args->handle); >>> } >>> +static int drm_syncobj_signaled(struct drm_syncobj *syncobj, >>> + uint32_t wait_flags) >>> +{ >>> + struct dma_fence *fence; >>> + int ret; >>> + >>> + fence = drm_syncobj_fence_get(syncobj); >>> + if (!fence) { >>> + if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) >>> + return 0; >>> + else >>> + return -EINVAL; >>> + } >>> + >>> + ret = dma_fence_is_signaled(fence) ? 1 : 0; >>> + >>> + dma_fence_put(fence); >>> + >>> + return ret; >>> +} >>> + >>> +struct syncobj_wait_entry { >>> + struct task_struct *task; >>> + struct dma_fence *fence; >>> + struct dma_fence_cb fence_cb; >>> + struct drm_syncobj_cb syncobj_cb; >>> +}; >>> + >>> +static void syncobj_wait_fence_func(struct dma_fence *fence, >>> + struct dma_fence_cb *cb) >>> +{ >>> + struct syncobj_wait_entry *wait = >>> + container_of(cb, struct syncobj_wait_entry, fence_cb); >>> + >>> + wake_up_process(wait->task); >>> +} >>> + >>> +static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, >>> + struct drm_syncobj_cb *cb) >>> +{ >>> + struct syncobj_wait_entry *wait = >>> + container_of(cb, struct syncobj_wait_entry, syncobj_cb); >>> + >>> + /* This happens inside the syncobj lock */ >>> + wait->fence = dma_fence_get(syncobj->fence); >>> + wake_up_process(wait->task); >>> +} >>> + >>> +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj >>> **syncobjs, >>> + uint32_t count, >>> + uint32_t flags, >>> + signed long timeout, >>> + uint32_t *idx) >>> +{ >>> + struct syncobj_wait_entry *entries; >>> + struct dma_fence *fence; >>> + signed long ret; >>> + uint32_t signaled_count, i; >>> + >>> + if (timeout == 0) { >>> + signaled_count = 0; >>> + for (i = 0; i < count; ++i) { >>> + ret = drm_syncobj_signaled(syncobjs[i], flags); >>> + if (ret < 0) >>> + return ret; >>> + if (ret == 0) >>> + continue; >>> + if (signaled_count == 0 && idx) >>> + *idx = i; >>> + signaled_count++; >>> + } >>> + >>> + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) >>> + return signaled_count == count ? 1 : 0; >>> + else >>> + return signaled_count > 0 ? 1 : 0; >>> + } >>> + >>> + entries = kcalloc(count, sizeof(*entries), GFP_KERNEL); >>> + if (!entries) >>> + return -ENOMEM; >>> + >>> + for (i = 0; i < count; ++i) { >>> + entries[i].task = current; >>> + >>> + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { >>> + drm_syncobj_fence_get_or_add_ >>> callback(syncobjs[i], >>> + >>> &entries[i].fence, >>> + >>> &entries[i].syncobj_cb, >>> + >>> syncobj_wait_syncobj_func); >>> + } else { >>> + entries[i].fence = drm_syncobj_fence_get(syncobjs >>> [i]); >>> + if (!entries[i].fence) { >>> + ret = -EINVAL; >>> + goto err_cleanup_entries; >>> + } >>> >> So we are taking a snapshot here. It looks like this could have been >> done using a dma_fence_array + dma_fence_proxy for capturing the future >> fence. >> > I'm not sure what you mean. Is that something in the future fence series that I should be looking at? > + ret = timeout; >>> + while (ret > 0) { >>> + signaled_count = 0; >>> + for (i = 0; i < count; ++i) { >>> + fence = entries[i].fence; >>> + if (!fence) >>> + continue; >>> + >>> + if (dma_fence_is_signaled(fence) || >>> + (!entries[i].fence_cb.func && >>> + dma_fence_add_callback(fence, >>> + &entries[i].fence_cb, >>> + >>> syncobj_wait_fence_func))) { >>> + /* The fence has been signaled */ >>> + if (flags & >>> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) { >>> + signaled_count++; >>> + } else { >>> + if (idx) >>> + *idx = i; >>> + goto done_waiting; >>> + } >>> + } >>> + } >>> + >>> + if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) && >>> + signaled_count == count) >>> + goto done_waiting; >>> + >>> + set_current_state(TASK_INTERRUPTIBLE); >>> >> This is the first step in the wait-loop, you set TASK_INTERRUPTIBLE >> before doing any checks. So that if any state changes whilst you are in >> the middle of those checks, the schedule_timeout is a nop and you can >> check again. >> > > Yeah, completely agree. > > I would rather drop the approach with the custom wait and try to use > wait_event_interruptible here. > > As far as I can see that should make the whole patch much cleaner in > general. > Sounds good to me. --Jason
_______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel