On 12/11/2015 05:11 AM, john.c.harri...@intel.com wrote:
> From: John Harrison <john.c.harri...@intel.com>
> 
> The fence object used inside the request structure requires a sequence
> number. Although this is not used by the i915 driver itself, it could
> potentially be used by non-i915 code if the fence is passed outside of
> the driver. This is the intention as it allows external kernel drivers
> and user applications to wait on batch buffer completion
> asynchronously via the dma-buff fence API.
> 
> To ensure that such external users are not confused by strange things
> happening with the seqno, this patch adds in a per context timeline
> that can provide a guaranteed in-order seqno value for the fence. This
> is safe because the scheduler will not re-order batch buffers within a
> context - they are considered to be mutually dependent.
> 
> v2: New patch in series.
> 
> v3: Renamed/retyped timeline structure fields after review comments by
> Tvrtko Ursulin.
> 
> Added context information to the timeline's name string for better
> identification in debugfs output.
> 
> For: VIZ-5190
> Signed-off-by: John Harrison <john.c.harri...@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         | 25 ++++++++---
>  drivers/gpu/drm/i915/i915_gem.c         | 80 
> +++++++++++++++++++++++++++++----
>  drivers/gpu/drm/i915/i915_gem_context.c | 15 ++++++-
>  drivers/gpu/drm/i915/intel_lrc.c        |  8 ++++
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
>  5 files changed, 111 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index caf7897..7d6a7c0 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -841,6 +841,15 @@ struct i915_ctx_hang_stats {
>       bool banned;
>  };
>  
> +struct i915_fence_timeline {
> +     char        name[32];
> +     unsigned    fence_context;
> +     unsigned    next;
> +
> +     struct intel_context *ctx;
> +     struct intel_engine_cs *ring;
> +};
> +
>  /* This must match up with the value previously used for execbuf2.rsvd1. */
>  #define DEFAULT_CONTEXT_HANDLE 0
>  
> @@ -885,6 +894,7 @@ struct intel_context {
>               struct drm_i915_gem_object *state;
>               struct intel_ringbuffer *ringbuf;
>               int pin_count;
> +             struct i915_fence_timeline fence_timeline;
>       } engine[I915_NUM_RINGS];
>  
>       struct list_head link;
> @@ -2177,13 +2187,10 @@ void i915_gem_track_fb(struct drm_i915_gem_object 
> *old,
>  struct drm_i915_gem_request {
>       /**
>        * Underlying object for implementing the signal/wait stuff.
> -      * NB: Never call fence_later() or return this fence object to user
> -      * land! Due to lazy allocation, scheduler re-ordering, pre-emption,
> -      * etc., there is no guarantee at all about the validity or
> -      * sequentiality of the fence's seqno! It is also unsafe to let
> -      * anything outside of the i915 driver get hold of the fence object
> -      * as the clean up when decrementing the reference count requires
> -      * holding the driver mutex lock.
> +      * NB: Never return this fence object to user land! It is unsafe to
> +      * let anything outside of the i915 driver get hold of the fence
> +      * object as the clean up when decrementing the reference count
> +      * requires holding the driver mutex lock.
>        */
>       struct fence fence;
>  
> @@ -2263,6 +2270,10 @@ int i915_gem_request_alloc(struct intel_engine_cs 
> *ring,
>                          struct drm_i915_gem_request **req_out);
>  void i915_gem_request_cancel(struct drm_i915_gem_request *req);
>  
> +int i915_create_fence_timeline(struct drm_device *dev,
> +                            struct intel_context *ctx,
> +                            struct intel_engine_cs *ring);
> +
>  static inline bool i915_gem_request_completed(struct drm_i915_gem_request 
> *req)
>  {
>       return fence_is_signaled(&req->fence);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0801738..7a37fb7 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2665,9 +2665,32 @@ static const char 
> *i915_gem_request_get_driver_name(struct fence *req_fence)
>  
>  static const char *i915_gem_request_get_timeline_name(struct fence 
> *req_fence)
>  {
> -     struct drm_i915_gem_request *req = container_of(req_fence,
> -                                              typeof(*req), fence);
> -     return req->ring->name;
> +     struct drm_i915_gem_request *req;
> +     struct i915_fence_timeline *timeline;
> +
> +     req = container_of(req_fence, typeof(*req), fence);
> +     timeline = &req->ctx->engine[req->ring->id].fence_timeline;
> +
> +     return timeline->name;
> +}
> +
> +static void i915_gem_request_timeline_value_str(struct fence *req_fence, 
> char *str, int size)
> +{
> +     struct drm_i915_gem_request *req;
> +
> +     req = container_of(req_fence, typeof(*req), fence);
> +
> +     /* Last signalled timeline value ??? */
> +     snprintf(str, size, "? [%d]"/*, timeline->value*/, 
> req->ring->get_seqno(req->ring, true));
> +}
> +
> +static void i915_gem_request_fence_value_str(struct fence *req_fence, char 
> *str, int size)
> +{
> +     struct drm_i915_gem_request *req;
> +
> +     req = container_of(req_fence, typeof(*req), fence);
> +
> +     snprintf(str, size, "%d [%d]", req->fence.seqno, req->seqno);
>  }
>  
>  static const struct fence_ops i915_gem_request_fops = {
> @@ -2677,8 +2700,49 @@ static const struct fence_ops i915_gem_request_fops = {
>       .release                = i915_gem_request_free,
>       .get_driver_name        = i915_gem_request_get_driver_name,
>       .get_timeline_name      = i915_gem_request_get_timeline_name,
> +     .fence_value_str        = i915_gem_request_fence_value_str,
> +     .timeline_value_str     = i915_gem_request_timeline_value_str,
>  };
>  
> +int i915_create_fence_timeline(struct drm_device *dev,
> +                            struct intel_context *ctx,
> +                            struct intel_engine_cs *ring)
> +{
> +     struct i915_fence_timeline *timeline;
> +
> +     timeline = &ctx->engine[ring->id].fence_timeline;
> +
> +     if (timeline->ring)
> +             return 0;
> +
> +     timeline->fence_context = fence_context_alloc(1);
> +
> +     /*
> +      * Start the timeline from seqno 0 as this is a special value
> +      * that is reserved for invalid sync points.
> +      */
> +     timeline->next       = 1;
> +     timeline->ctx        = ctx;
> +     timeline->ring       = ring;
> +
> +     snprintf(timeline->name, sizeof(timeline->name), "%d>%s:%d", 
> timeline->fence_context, ring->name, ctx->user_handle);
> +
> +     return 0;
> +}
> +
> +static unsigned i915_fence_timeline_get_next_seqno(struct 
> i915_fence_timeline *timeline)
> +{
> +     unsigned seqno;
> +
> +     seqno = timeline->next;
> +
> +     /* Reserve zero for invalid */
> +     if (++timeline->next == 0 )
> +             timeline->next = 1;
> +
> +     return seqno;
> +}
> +
>  int i915_gem_request_alloc(struct intel_engine_cs *ring,
>                          struct intel_context *ctx,
>                          struct drm_i915_gem_request **req_out)
> @@ -2714,7 +2778,9 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
>               goto err;
>       }
>  
> -     fence_init(&req->fence, &i915_gem_request_fops, &ring->fence_lock, 
> ring->fence_context, req->seqno);
> +     fence_init(&req->fence, &i915_gem_request_fops, &ring->fence_lock,
> +                ctx->engine[ring->id].fence_timeline.fence_context,
> +                
> i915_fence_timeline_get_next_seqno(&ctx->engine[ring->id].fence_timeline));
>  
>       /*
>        * Reserve space in the ring buffer for all the commands required to
> @@ -4765,7 +4831,7 @@ i915_gem_init_hw(struct drm_device *dev)
>  {
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       struct intel_engine_cs *ring;
> -     int ret, i, j, fence_base;
> +     int ret, i, j;
>  
>       if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
>               return -EIO;
> @@ -4835,16 +4901,12 @@ i915_gem_init_hw(struct drm_device *dev)
>       if (ret)
>               goto out;
>  
> -     fence_base = fence_context_alloc(I915_NUM_RINGS);
> -
>       /* Now it is safe to go back round and do everything else: */
>       for_each_ring(ring, dev_priv, i) {
>               struct drm_i915_gem_request *req;
>  
>               WARN_ON(!ring->default_context);
>  
> -             ring->fence_context = fence_base + i;
> -
>               ret = i915_gem_request_alloc(ring, ring->default_context, &req);
>               if (ret) {
>                       i915_gem_cleanup_ringbuffer(dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
> b/drivers/gpu/drm/i915/i915_gem_context.c
> index 43b1c73..2798ddc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -266,7 +266,7 @@ i915_gem_create_context(struct drm_device *dev,
>  {
>       const bool is_global_default_ctx = file_priv == NULL;
>       struct intel_context *ctx;
> -     int ret = 0;
> +     int i, ret = 0;
>  
>       BUG_ON(!mutex_is_locked(&dev->struct_mutex));
>  
> @@ -274,6 +274,19 @@ i915_gem_create_context(struct drm_device *dev,
>       if (IS_ERR(ctx))
>               return ctx;
>  
> +     if (!i915.enable_execlists) {
> +             struct intel_engine_cs *ring;
> +
> +             /* Create a per context timeline for fences */
> +             for_each_ring(ring, to_i915(dev), i) {
> +                     ret = i915_create_fence_timeline(dev, ctx, ring);
> +                     if (ret) {
> +                             DRM_ERROR("Fence timeline creation failed for 
> legacy %s: %p\n", ring->name, ctx);
> +                             goto err_destroy;
> +                     }
> +             }
> +     }
> +
>       if (is_global_default_ctx && ctx->legacy_hw_ctx.rcs_state) {
>               /* We may need to do things with the shrinker which
>                * require us to immediately switch back to the default
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index b8c8f9b..2b56651 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2489,6 +2489,14 @@ int intel_lr_context_deferred_alloc(struct 
> intel_context *ctx,
>               goto error_ringbuf;
>       }
>  
> +     /* Create a per context timeline for fences */
> +     ret = i915_create_fence_timeline(dev, ctx, ring);
> +     if (ret) {
> +             DRM_ERROR("Fence timeline creation failed for ring %s, ctx 
> %p\n",
> +                       ring->name, ctx);
> +             goto error_ringbuf;
> +     }
> +
>       ctx->engine[ring->id].ringbuf = ringbuf;
>       ctx->engine[ring->id].state = ctx_obj;
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
> b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 4547645..356b6a8 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -349,7 +349,6 @@ struct  intel_engine_cs {
>        */
>       u32 (*get_cmd_length_mask)(u32 cmd_header);
>  
> -     unsigned fence_context;
>       spinlock_t fence_lock;
>  };
>  
> 

Yeah we definitely want this, but it'll have to be reconciled with the 
different request->fence patches.  I'm not sure if it would be easier to move 
to per-context seqnos first or go this route and deal with the mismatch between 
global and per-ctx.

Jesse
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to