To allow faster engine to engine synchronization, peel the layer of
dma-fence-chain to expose potential i915 fences so that the
i915-request code can emit HW semaphore wait/signal operations in the
ring which is faster than waking up the host to submit unblocked
workloads after interrupt notification.

v2: Also deal with chains where the last node is not a dma-fence-chain

Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 39 ++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index d8814e637e71..3ffd95d1dc2c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2403,6 +2403,7 @@ await_fence_array(struct i915_execbuffer *eb)
 
        for (n = 0; n < eb->n_fences; n++) {
                struct drm_syncobj *syncobj;
+               struct dma_fence_chain *chain;
                struct dma_fence *fence;
                unsigned int flags;
 
@@ -2423,7 +2424,43 @@ await_fence_array(struct i915_execbuffer *eb)
                                continue;
                }
 
-               err = i915_request_await_dma_fence(eb->request, fence);
+               chain = to_dma_fence_chain(fence);
+               if (chain) {
+                       struct dma_fence *iter;
+
+                       /*
+                        * If we're dealing with a dma-fence-chain, peel the
+                        * chain by adding all of the unsignaled fences
+                        * (dma_fence_chain_for_each does that for us) the
+                        * chain points to.
+                        *
+                        * This enables us to identify waits on i915 fences
+                        * and allows for faster engine-to-engine
+                        * synchronization using HW semaphores.
+                        */
+                       dma_fence_chain_for_each(iter, fence) {
+                               struct dma_fence_chain *iter_chain =
+                                       to_dma_fence_chain(iter);
+
+                               /*
+                                * It is possible that the last item in the
+                                * chain is not a dma_fence_chain.
+                                */
+                               if (iter_chain) {
+                                       err = 
i915_request_await_dma_fence(eb->request,
+                                                                          
iter_chain->fence);
+                               } else {
+                                       err = 
i915_request_await_dma_fence(eb->request, iter);
+                               }
+                               if (err < 0) {
+                                       dma_fence_put(iter);
+                                       break;
+                               }
+                       }
+               } else {
+                       err = i915_request_await_dma_fence(eb->request, fence);
+               }
+
                dma_fence_put(fence);
                if (err < 0)
                        return err;
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to