Now that the user can opt-out of implicit fencing, we need to give them back control over the fencing. We employ sync_file to wrap our drm_i915_gem_request and provide an fd that userspace can merge with other sync_file fds and pass back to the kernel to wait upon before future execution.
Testcase: igt/gem_exec_fence Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahti...@linux.intel.com> --- drivers/gpu/drm/i915/Kconfig | 1 + drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 54 +++++++++++++++++++++++++++--- include/uapi/drm/i915_drm.h | 35 ++++++++++++++++++- 4 files changed, 86 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index df96aed6975a..8e93b61bd8c3 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -19,6 +19,7 @@ config DRM_I915 select INPUT if ACPI select ACPI_VIDEO if ACPI select ACPI_BUTTON if ACPI + select SYNC_FILE help Choose this option if you have a system that has "Intel Graphics Media Accelerator" or "HD Graphics" integrated graphics, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 4bfd5a582aa2..d022fb207a42 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -344,6 +344,7 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_COHERENT_PHYS_GTT: case I915_PARAM_HAS_EXEC_SOFTPIN: case I915_PARAM_HAS_EXEC_ASYNC: + case I915_PARAM_HAS_EXEC_FENCE: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from @@ -2534,7 +2535,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 0ae2feb24632..83533fb6cf46 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -28,6 +28,7 @@ #include <linux/dma_remapping.h> #include <linux/reservation.h> +#include <linux/sync_file.h> #include <linux/uaccess.h> #include <drm/drmP.h> @@ -1588,6 +1589,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct i915_execbuffer_params *params = ¶ms_master; const u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 dispatch_flags; + struct dma_fence *in_fence = NULL; + struct sync_file *out_fence = NULL; + int out_fence_fd = -1; int ret; bool need_relocs; @@ -1631,6 +1635,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, dispatch_flags |= I915_DISPATCH_RS; } + if (args->flags & I915_EXEC_FENCE_IN) { + in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); + if (!in_fence) { + ret = -EINVAL; + goto pre_mutex_err; + } + } + + if (args->flags & I915_EXEC_FENCE_OUT) { + out_fence_fd = get_unused_fd_flags(O_CLOEXEC); + if (out_fence_fd < 0) { + ret = out_fence_fd; + out_fence_fd = -1; + goto pre_mutex_err; + } + } + /* Take a local wakeref for preparing to dispatch the execbuf as * we expect to access the hardware fairly frequently in the * process. Upon first dispatch, we acquire another prolonged @@ -1775,6 +1796,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err_batch_unpin; } + if (in_fence) { + ret = i915_gem_request_await_dma_fence(params->request, + in_fence); + if (ret < 0) + goto err_request; + } + + if (out_fence_fd != -1) { + out_fence = sync_file_create(¶ms->request->fence); + if (!out_fence) { + ret = -ENOMEM; + goto err_request; + } + } + /* Whilst this request exists, batch_obj will be on the * active_list, and so will hold the active reference. Only when this * request is retired will the the batch_obj be moved onto the @@ -1802,6 +1838,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, ret = execbuf_submit(params, args, &eb->vmas); err_request: __i915_add_request(params->request, ret == 0); + if (out_fence) { + if (ret == 0) { + fd_install(out_fence_fd, out_fence->file); + args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */ + args->rsvd2 |= (u64)out_fence_fd << 32; + out_fence_fd = -1; + } else { + fput(out_fence->file); + } + } err_batch_unpin: /* @@ -1823,6 +1869,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, /* intel_gpu_busy should also get a ref, so it will free when the device * is really idle. */ intel_runtime_pm_put(dev_priv); + if (out_fence_fd != -1) + put_unused_fd(out_fence_fd); + dma_fence_put(in_fence); return ret; } @@ -1930,11 +1979,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EINVAL; } - if (args->rsvd2 != 0) { - DRM_DEBUG("dirty rvsd2 field\n"); - return -EINVAL; - } - exec2_list = drm_malloc_gfp(args->buffer_count, sizeof(*exec2_list), GFP_TEMPORARY); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 4bd83c0b07db..90082269fb50 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_OVERLAY_PUT_IMAGE 0x27 #define DRM_I915_OVERLAY_ATTRS 0x28 #define DRM_I915_GEM_EXECBUFFER2 0x29 +#define DRM_I915_GEM_EXECBUFFER2_WR DRM_I915_GEM_EXECBUFFER2 #define DRM_I915_GET_SPRITE_COLORKEY 0x2a #define DRM_I915_SET_SPRITE_COLORKEY 0x2b #define DRM_I915_GEM_WAIT 0x2c @@ -279,6 +280,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init) #define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) #define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) +#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2) #define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin) #define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin) #define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) @@ -401,6 +403,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_ASYNC 42 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support - + * both being able to pass in a sync_file fd to wait upon before executing, + * and being able to return a new sync_file fd that is signaled when the + * current request is complete. + */ +#define I915_PARAM_HAS_EXEC_FENCE 43 typedef struct drm_i915_getparam { __s32 param; @@ -854,7 +862,32 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_RESOURCE_STREAMER (1<<15) -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1) +/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing + * the batch. + * + * Returns -EINVAL if the sync_file fd cannot be found. + */ +#define I915_EXEC_FENCE_IN (1<<16) + +/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd + * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given + * to the caller, and it should be close() after use. (The fd is a regular + * file descriptor and will be cleaned up on process termination. It holds + * a reference to the request, but nothing else.) + * + * The sync_file fd can be combined with other sync_file and passed either + * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip + * will only occur after this request completes), or to other devices. + * + * Using I915_EXEC_FENCE_OUT requires use of + * DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written + * back to userspace. Failure to do so will cause the out-fence to always + * be reported as zero, and the real fence fd to be leaked. + */ +#define I915_EXEC_FENCE_OUT (1<<17) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- 2.10.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx