Re: [Intel-gfx] [RFC 9/9] drm/i915: Add sync framework support to execbuff IOCTL
On Wed, Oct 28, 2015 at 01:01:17PM +, John Harrison wrote: > On 27/07/2015 14:00, Tvrtko Ursulin wrote: > >On 07/17/2015 03:31 PM, john.c.harri...@intel.com wrote: > >>From: John Harrison > >> > >>Various projects desire a mechanism for managing dependencies between > >>work items asynchronously. This can also include work items across > >>complete different and independent systems. For example, an > >>application wants to retreive a frame from a video in device, > >>using it for rendering on a GPU then send it to the video out device > >>for display all without having to stall waiting for completion along > >>the way. The sync framework allows this. It encapsulates > >>synchronisation events in file descriptors. The application can > >>request a sync point for the completion of each piece of work. Drivers > >>should also take sync points in with each new work request and not > >>schedule the work to start until the sync has been signalled. > >> > >>This patch adds sync framework support to the exec buffer IOCTL. A > >>sync point can be passed in to stall execution of the batch buffer > >>until signalled. And a sync point can be returned after each batch > >>buffer submission which will be signalled upon that batch buffer's > >>completion. > >> > >>At present, the input sync point is simply waited on synchronously > >>inside the exec buffer IOCTL call. Once the GPU scheduler arrives, > >>this will be handled asynchronously inside the scheduler and the IOCTL > >>can return without having to wait. > >> > >>Note also that the scheduler will re-order the execution of batch > >>buffers, e.g. because a batch buffer is stalled on a sync point and > >>cannot be submitted yet but other, independent, batch buffers are > >>being presented to the driver. This means that the timeline within the > >>sync points returned cannot be global to the engine. Instead they must > >>be kept per context per engine (the scheduler may not re-order batches > >>within a context). Hence the timeline cannot be based on the existing > >>seqno values but must be a new implementation. > >> > >>This patch is a port of work by several people that has been pulled > >>across from Android. It has been updated several times across several > >>patches. Rather than attempt to port each individual patch, this > >>version is the finished product as a single patch. The various > >>contributors/authors along the way (in addition to myself) were: > >> Satyanantha RamaGopal M > >> Tvrtko Ursulin > >> Michel Thierry > >> Arun Siluvery > >> > >>[new patch in series] > >> > >>For: VIZ-5190 > >>Signed-off-by: John Harrison > >>--- > >> drivers/gpu/drm/i915/i915_drv.h| 6 ++ > >> drivers/gpu/drm/i915/i915_gem.c| 84 > >> > >> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 90 > >>-- > >> include/uapi/drm/i915_drm.h| 16 +- > >> 4 files changed, 188 insertions(+), 8 deletions(-) > >> > >>diff --git a/drivers/gpu/drm/i915/i915_drv.h > >>b/drivers/gpu/drm/i915/i915_drv.h > >>index d7f1aa5..cf6b7cd 100644 > >>--- a/drivers/gpu/drm/i915/i915_drv.h > >>+++ b/drivers/gpu/drm/i915/i915_drv.h > >>@@ -2168,6 +2168,7 @@ struct drm_i915_gem_request { > >> struct list_head delay_free_list; > >> bool cancelled; > >> bool irq_enabled; > >>+bool fence_external; > >> > >> /** On Which ring this request was generated */ > >> struct drm_i915_private *i915; > >>@@ -2252,6 +2253,11 @@ void i915_gem_request_notify(struct > >>intel_engine_cs *ring); > >> int i915_create_fence_timeline(struct drm_device *dev, > >> struct intel_context *ctx, > >> struct intel_engine_cs *ring); > >>+#ifdef CONFIG_SYNC > >>+struct sync_fence; > >>+int i915_create_sync_fence(struct drm_i915_gem_request *req, int > >>*fence_fd); > >>+bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct > >>sync_fence *fence); > >>+#endif > >> > >> static inline bool i915_gem_request_completed(struct > >>drm_i915_gem_request *req) > >> { > >>diff --git a/drivers/gpu/drm/i915/i915_gem.c > >>b/drivers/gpu/drm/i915/i915_gem.c > >>index 3f20087..de93422 100644 > >>--- a/drivers/gpu/drm/i915/i915_gem.c > >>+++ b/drivers/gpu/drm/i915/i915_gem.c > >>@@ -37,6 +37,9 @@ > >> #include > >> #include > >> #include > >>+#ifdef CONFIG_SYNC > >>+#include <../drivers/staging/android/sync.h> > >>+#endif > >> > >> #define RQ_BUG_ON(expr) > >> > >>@@ -2549,6 +2552,15 @@ void __i915_add_request(struct > >>drm_i915_gem_request *request, > >> */ > >> i915_gem_request_submit(request); > >> > >>+/* > >>+ * If an external sync point has been requested for this request > >>then > >>+ * it can be waited on without the driver's knowledge, i.e. without > >>+ * calling __i915_wait_request(). Thus interrupts must be enabled > >>+ * from the start rather than only on demand. > >>+ */ > >>+if (request->fence
Re: [Intel-gfx] [RFC 9/9] drm/i915: Add sync framework support to execbuff IOCTL
On 28/10/15 13:01, John Harrison wrote: On 27/07/2015 14:00, Tvrtko Ursulin wrote: [snip] +if (!sync_fence) { +put_unused_fd(fd); +*fence_fd = -1; +return -ENOMEM; +} + +sync_fence_install(sync_fence, fd); +*fence_fd = fd; + +// Necessary??? Who does the put??? +fence_get(&req->fence); sync_fence_release? Yes but where? Does the driver need to call this? Is it userland's responsibility? Does it happen automatically when the fd is closed? Do we even need to do the _get() in the first place? It seems to be working in that I don't get any unexpected free of the fence and I don't get huge numbers of leaked fences. However, it would be nice to know how it is working! When the fd is closed, implicitly or explicitly (close(2)/exit(2)/any process termination), kernel will automatically call sync_fence_release via the file_operations set in the inode->f_ops at sync_fence creation time (Actually not when the fd is closed but when the last instance of struct file * in question goes away.) +return 1; +} + +if (atomic_read(&fence->status) == 0) { +if (!i915_safe_to_ignore_fence(ring, fence)) +ret = sync_fence_wait(fence, 1000); I expect you have to wait indefinitely here, not just for one second. Causing the driver to wait indefinitely under userland control is surely a Bad Thing(tm)? Okay, this is done before acquiring the mutex lock and presumably the wait can be interrupted, e.g. if the user land process gets a KILL signal. It still seems a bad idea to wait forever. Various bits of Android generally use a timeout of either 1s or 3s. Daniel or anyone else, any views of driver time outs? It is slightly irrelevant since this is a temporary code path before the scheduler lands. But I don't see it makes sense to have made up timeouts. If userspace gave us something to wait on, then I think we should wait until it is done or it fails. It is not blocking the driver but is running on the behalf of the same process which passed in the fd to wait on. So the process can only block itself. Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC 9/9] drm/i915: Add sync framework support to execbuff IOCTL
On 27/07/2015 14:00, Tvrtko Ursulin wrote: On 07/17/2015 03:31 PM, john.c.harri...@intel.com wrote: From: John Harrison Various projects desire a mechanism for managing dependencies between work items asynchronously. This can also include work items across complete different and independent systems. For example, an application wants to retreive a frame from a video in device, using it for rendering on a GPU then send it to the video out device for display all without having to stall waiting for completion along the way. The sync framework allows this. It encapsulates synchronisation events in file descriptors. The application can request a sync point for the completion of each piece of work. Drivers should also take sync points in with each new work request and not schedule the work to start until the sync has been signalled. This patch adds sync framework support to the exec buffer IOCTL. A sync point can be passed in to stall execution of the batch buffer until signalled. And a sync point can be returned after each batch buffer submission which will be signalled upon that batch buffer's completion. At present, the input sync point is simply waited on synchronously inside the exec buffer IOCTL call. Once the GPU scheduler arrives, this will be handled asynchronously inside the scheduler and the IOCTL can return without having to wait. Note also that the scheduler will re-order the execution of batch buffers, e.g. because a batch buffer is stalled on a sync point and cannot be submitted yet but other, independent, batch buffers are being presented to the driver. This means that the timeline within the sync points returned cannot be global to the engine. Instead they must be kept per context per engine (the scheduler may not re-order batches within a context). Hence the timeline cannot be based on the existing seqno values but must be a new implementation. This patch is a port of work by several people that has been pulled across from Android. It has been updated several times across several patches. Rather than attempt to port each individual patch, this version is the finished product as a single patch. The various contributors/authors along the way (in addition to myself) were: Satyanantha RamaGopal M Tvrtko Ursulin Michel Thierry Arun Siluvery [new patch in series] For: VIZ-5190 Signed-off-by: John Harrison --- drivers/gpu/drm/i915/i915_drv.h| 6 ++ drivers/gpu/drm/i915/i915_gem.c| 84 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 90 -- include/uapi/drm/i915_drm.h| 16 +- 4 files changed, 188 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d7f1aa5..cf6b7cd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2168,6 +2168,7 @@ struct drm_i915_gem_request { struct list_head delay_free_list; bool cancelled; bool irq_enabled; +bool fence_external; /** On Which ring this request was generated */ struct drm_i915_private *i915; @@ -2252,6 +2253,11 @@ void i915_gem_request_notify(struct intel_engine_cs *ring); int i915_create_fence_timeline(struct drm_device *dev, struct intel_context *ctx, struct intel_engine_cs *ring); +#ifdef CONFIG_SYNC +struct sync_fence; +int i915_create_sync_fence(struct drm_i915_gem_request *req, int *fence_fd); +bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct sync_fence *fence); +#endif static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3f20087..de93422 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -37,6 +37,9 @@ #include #include #include +#ifdef CONFIG_SYNC +#include <../drivers/staging/android/sync.h> +#endif #define RQ_BUG_ON(expr) @@ -2549,6 +2552,15 @@ void __i915_add_request(struct drm_i915_gem_request *request, */ i915_gem_request_submit(request); +/* + * If an external sync point has been requested for this request then + * it can be waited on without the driver's knowledge, i.e. without + * calling __i915_wait_request(). Thus interrupts must be enabled + * from the start rather than only on demand. + */ +if (request->fence_external) +i915_gem_request_enable_interrupt(request); Maybe then fence_exported would be clearer, fence_external at first sounds like it is coming from another driver or something. Turns out it is not necessary anyway as mentioned below. + if (i915.enable_execlists) ret = ring->emit_request(request); else { @@ -2857,6 +2869,78 @@ static uint32_t i915_fence_timeline_get_next_seqno(struct i915_fence_timeline *t return seqno; } +#ifdef CONFIG_SYNC +int
Re: [Intel-gfx] [RFC 9/9] drm/i915: Add sync framework support to execbuff IOCTL
On 07/17/2015 03:31 PM, john.c.harri...@intel.com wrote: From: John Harrison Various projects desire a mechanism for managing dependencies between work items asynchronously. This can also include work items across complete different and independent systems. For example, an application wants to retreive a frame from a video in device, using it for rendering on a GPU then send it to the video out device for display all without having to stall waiting for completion along the way. The sync framework allows this. It encapsulates synchronisation events in file descriptors. The application can request a sync point for the completion of each piece of work. Drivers should also take sync points in with each new work request and not schedule the work to start until the sync has been signalled. This patch adds sync framework support to the exec buffer IOCTL. A sync point can be passed in to stall execution of the batch buffer until signalled. And a sync point can be returned after each batch buffer submission which will be signalled upon that batch buffer's completion. At present, the input sync point is simply waited on synchronously inside the exec buffer IOCTL call. Once the GPU scheduler arrives, this will be handled asynchronously inside the scheduler and the IOCTL can return without having to wait. Note also that the scheduler will re-order the execution of batch buffers, e.g. because a batch buffer is stalled on a sync point and cannot be submitted yet but other, independent, batch buffers are being presented to the driver. This means that the timeline within the sync points returned cannot be global to the engine. Instead they must be kept per context per engine (the scheduler may not re-order batches within a context). Hence the timeline cannot be based on the existing seqno values but must be a new implementation. This patch is a port of work by several people that has been pulled across from Android. It has been updated several times across several patches. Rather than attempt to port each individual patch, this version is the finished product as a single patch. The various contributors/authors along the way (in addition to myself) were: Satyanantha RamaGopal M Tvrtko Ursulin Michel Thierry Arun Siluvery [new patch in series] For: VIZ-5190 Signed-off-by: John Harrison --- drivers/gpu/drm/i915/i915_drv.h| 6 ++ drivers/gpu/drm/i915/i915_gem.c| 84 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 90 -- include/uapi/drm/i915_drm.h| 16 +- 4 files changed, 188 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d7f1aa5..cf6b7cd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2168,6 +2168,7 @@ struct drm_i915_gem_request { struct list_head delay_free_list; bool cancelled; bool irq_enabled; + bool fence_external; /** On Which ring this request was generated */ struct drm_i915_private *i915; @@ -2252,6 +2253,11 @@ void i915_gem_request_notify(struct intel_engine_cs *ring); int i915_create_fence_timeline(struct drm_device *dev, struct intel_context *ctx, struct intel_engine_cs *ring); +#ifdef CONFIG_SYNC +struct sync_fence; +int i915_create_sync_fence(struct drm_i915_gem_request *req, int *fence_fd); +bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct sync_fence *fence); +#endif static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3f20087..de93422 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -37,6 +37,9 @@ #include #include #include +#ifdef CONFIG_SYNC +#include <../drivers/staging/android/sync.h> +#endif #define RQ_BUG_ON(expr) @@ -2549,6 +2552,15 @@ void __i915_add_request(struct drm_i915_gem_request *request, */ i915_gem_request_submit(request); + /* +* If an external sync point has been requested for this request then +* it can be waited on without the driver's knowledge, i.e. without +* calling __i915_wait_request(). Thus interrupts must be enabled +* from the start rather than only on demand. +*/ + if (request->fence_external) + i915_gem_request_enable_interrupt(request); Maybe then fence_exported would be clearer, fence_external at first sounds like it is coming from another driver or something. + if (i915.enable_execlists) ret = ring->emit_request(request); else { @@ -2857,6 +2869,78 @@ static uint32_t i915_fence_timeline_get_next_seqno(struct i915_fence_timeline *t return seqno; } +#ifdef CONFIG_SYNC +int i915_create_sync_fence(struct drm_i91
[Intel-gfx] [RFC 9/9] drm/i915: Add sync framework support to execbuff IOCTL
From: John Harrison Various projects desire a mechanism for managing dependencies between work items asynchronously. This can also include work items across complete different and independent systems. For example, an application wants to retreive a frame from a video in device, using it for rendering on a GPU then send it to the video out device for display all without having to stall waiting for completion along the way. The sync framework allows this. It encapsulates synchronisation events in file descriptors. The application can request a sync point for the completion of each piece of work. Drivers should also take sync points in with each new work request and not schedule the work to start until the sync has been signalled. This patch adds sync framework support to the exec buffer IOCTL. A sync point can be passed in to stall execution of the batch buffer until signalled. And a sync point can be returned after each batch buffer submission which will be signalled upon that batch buffer's completion. At present, the input sync point is simply waited on synchronously inside the exec buffer IOCTL call. Once the GPU scheduler arrives, this will be handled asynchronously inside the scheduler and the IOCTL can return without having to wait. Note also that the scheduler will re-order the execution of batch buffers, e.g. because a batch buffer is stalled on a sync point and cannot be submitted yet but other, independent, batch buffers are being presented to the driver. This means that the timeline within the sync points returned cannot be global to the engine. Instead they must be kept per context per engine (the scheduler may not re-order batches within a context). Hence the timeline cannot be based on the existing seqno values but must be a new implementation. This patch is a port of work by several people that has been pulled across from Android. It has been updated several times across several patches. Rather than attempt to port each individual patch, this version is the finished product as a single patch. The various contributors/authors along the way (in addition to myself) were: Satyanantha RamaGopal M Tvrtko Ursulin Michel Thierry Arun Siluvery [new patch in series] For: VIZ-5190 Signed-off-by: John Harrison --- drivers/gpu/drm/i915/i915_drv.h| 6 ++ drivers/gpu/drm/i915/i915_gem.c| 84 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 90 -- include/uapi/drm/i915_drm.h| 16 +- 4 files changed, 188 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d7f1aa5..cf6b7cd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2168,6 +2168,7 @@ struct drm_i915_gem_request { struct list_head delay_free_list; bool cancelled; bool irq_enabled; + bool fence_external; /** On Which ring this request was generated */ struct drm_i915_private *i915; @@ -2252,6 +2253,11 @@ void i915_gem_request_notify(struct intel_engine_cs *ring); int i915_create_fence_timeline(struct drm_device *dev, struct intel_context *ctx, struct intel_engine_cs *ring); +#ifdef CONFIG_SYNC +struct sync_fence; +int i915_create_sync_fence(struct drm_i915_gem_request *req, int *fence_fd); +bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct sync_fence *fence); +#endif static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3f20087..de93422 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -37,6 +37,9 @@ #include #include #include +#ifdef CONFIG_SYNC +#include <../drivers/staging/android/sync.h> +#endif #define RQ_BUG_ON(expr) @@ -2549,6 +2552,15 @@ void __i915_add_request(struct drm_i915_gem_request *request, */ i915_gem_request_submit(request); + /* +* If an external sync point has been requested for this request then +* it can be waited on without the driver's knowledge, i.e. without +* calling __i915_wait_request(). Thus interrupts must be enabled +* from the start rather than only on demand. +*/ + if (request->fence_external) + i915_gem_request_enable_interrupt(request); + if (i915.enable_execlists) ret = ring->emit_request(request); else { @@ -2857,6 +2869,78 @@ static uint32_t i915_fence_timeline_get_next_seqno(struct i915_fence_timeline *t return seqno; } +#ifdef CONFIG_SYNC +int i915_create_sync_fence(struct drm_i915_gem_request *req, int *fence_fd) +{ + char ring_name[] = "i915_ring0"; + struct sync_fence *sync_fence; + int fd; + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { +