[Intel-gfx] [RFC 7/9] drm/i915: Interrupt driven fences
From: John Harrison john.c.harri...@intel.com The intended usage model for struct fence is that the signalled status should be set on demand rather than polled. That is, there should not be a need for a 'signaled' function to be called everytime the status is queried. Instead, 'something' should be done to enable a signal callback from the hardware which will update the state directly. In the case of requests, this is the seqno update interrupt. The idea is that this callback will only be enabled on demand when something actually tries to wait on the fence. This change removes the polling test and replaces it with the callback scheme. Each fence is added to a 'please poke me' list at the start of i915_add_request(). The interrupt handler then scans through the 'poke me' list when a new seqno pops out and signals any matching fence/request. The fence is then removed from the list so the entire request stack does not need to be scanned every time. Note that the fence is added to the list before the commands to generate the seqno interrupt are added to the ring. Thus the sequence is guaranteed to be race free if the interrupt is already enabled. Note that the interrupt is only enabled on demand (i.e. when __wait_request() is called). Thus there is still a potential race when enabling the interrupt as the request may already have completed. However, this is simply solved by calling the interrupt processing code immediately after enabling the interrupt and thereby checking for already completed requests. Lastly, the ring clean up code has the possibility to cancel outstanding requests (e.g. because TDR has reset the ring). These requests will never get signalled and so must be removed from the signal list manually. This is done by setting a 'cancelled' flag and then calling the regular notify/retire code path rather than attempting to duplicate the list manipulatation and clean up code in multiple places. This also avoid any race condition where the cancellation request might occur after/during the completion interrupt actually arriving. v2: Updated to take advantage of the request unreference no longer requiring the mutex lock. For: VIZ-5190 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 8 ++ drivers/gpu/drm/i915/i915_gem.c | 132 +--- drivers/gpu/drm/i915/i915_irq.c | 2 + drivers/gpu/drm/i915/intel_lrc.c| 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 6 files changed, 136 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 61c3db2..d7f1aa5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2163,7 +2163,11 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, struct drm_i915_gem_request { /** Underlying object for implementing the signal/wait stuff. */ struct fence fence; + struct list_head signal_list; + struct list_head unsignal_list; struct list_head delay_free_list; + bool cancelled; + bool irq_enabled; /** On Which ring this request was generated */ struct drm_i915_private *i915; @@ -2241,6 +2245,10 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring, struct drm_i915_gem_request **req_out); void i915_gem_request_cancel(struct drm_i915_gem_request *req); +void i915_gem_request_submit(struct drm_i915_gem_request *req); +void i915_gem_request_enable_interrupt(struct drm_i915_gem_request *req); +void i915_gem_request_notify(struct intel_engine_cs *ring); + int i915_create_fence_timeline(struct drm_device *dev, struct intel_context *ctx, struct intel_engine_cs *ring); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 482835a..7c589a9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1222,6 +1222,11 @@ int __i915_wait_request(struct drm_i915_gem_request *req, if (list_empty(req-list)) return 0; + /* +* Enable interrupt completion of the request. +*/ + i915_gem_request_enable_interrupt(req); + if (i915_gem_request_completed(req)) return 0; @@ -1382,6 +1387,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) list_del_init(request-list); i915_gem_request_remove_from_client(request); + /* In case the request is still in the signal pending list */ + if (!list_empty(request-signal_list)) + request-cancelled = true; + i915_gem_request_unreference(request); } @@ -2534,6 +2543,12 @@ void __i915_add_request(struct drm_i915_gem_request *request, */ request-postfix = intel_ring_get_tail(ringbuf); + /* +* Add the
[Intel-gfx] [RFC 6/9] drm/i915: Delay the freeing of requests until retire time
From: John Harrison john.c.harri...@intel.com The request structure is reference counted. When the count reached zero, the request was immediately freed and all associated objects were unrefereced/unallocated. This meant that the driver mutex lock must be held at the point where the count reaches zero. This was fine while all references were held internally to the driver. However, the plan is to allow the underlying fence object (and hence the request itself) to be returned to other drivers and to userland. External users cannot be expected to acquire a driver private mutex lock. Rather than attempt to disentangle the request structure from the driver mutex lock, the decsion was to defer the free code until a later (safer) point. Hence this patch changes the unreference callback to merely move the request onto a delayed free list. The driver's retire worker thread will then process the list and actually call the free function on the requests. [new patch in series] For: VIZ-5190 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 22 +++--- drivers/gpu/drm/i915/i915_gem.c | 41 + drivers/gpu/drm/i915/intel_display.c| 2 +- drivers/gpu/drm/i915/intel_lrc.c| 2 ++ drivers/gpu/drm/i915/intel_pm.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++ drivers/gpu/drm/i915/intel_ringbuffer.h | 4 7 files changed, 50 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 88a4746..61c3db2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2161,14 +2161,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, * initial reference taken using kref_init */ struct drm_i915_gem_request { - /** -* Underlying object for implementing the signal/wait stuff. -* NB: Never return this fence object to user land! It is unsafe to -* let anything outside of the i915 driver get hold of the fence -* object as the clean up when decrementing the reference count -* requires holding the driver mutex lock. -*/ + /** Underlying object for implementing the signal/wait stuff. */ struct fence fence; + struct list_head delay_free_list; /** On Which ring this request was generated */ struct drm_i915_private *i915; @@ -2281,21 +2276,10 @@ i915_gem_request_reference(struct drm_i915_gem_request *req) static inline void i915_gem_request_unreference(struct drm_i915_gem_request *req) { - WARN_ON(!mutex_is_locked(req-ring-dev-struct_mutex)); - fence_put(req-fence); -} - -static inline void -i915_gem_request_unreference__unlocked(struct drm_i915_gem_request *req) -{ - struct drm_device *dev; - if (!req) return; - dev = req-ring-dev; - if (kref_put_mutex(req-fence.refcount, fence_release, dev-struct_mutex)) - mutex_unlock(dev-struct_mutex); + fence_put(req-fence); } static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index af79716..482835a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2616,10 +2616,27 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv, } } -static void i915_gem_request_free(struct fence *req_fence) +static void i915_gem_request_release(struct fence *req_fence) { struct drm_i915_gem_request *req = container_of(req_fence, typeof(*req), fence); + struct intel_engine_cs *ring = req-ring; + struct drm_i915_private *dev_priv = to_i915(ring-dev); + unsigned long flags; + + /* +* Need to add the request to a deferred dereference list to be +* processed at a mutex lock safe time. +*/ + spin_lock_irqsave(ring-delayed_free_lock, flags); + list_add_tail(req-delay_free_list, ring-delayed_free_list); + spin_unlock_irqrestore(ring-delayed_free_lock, flags); + + queue_delayed_work(dev_priv-wq, dev_priv-mm.retire_work, 0); +} + +static void i915_gem_request_free(struct drm_i915_gem_request *req) +{ struct intel_context *ctx = req-ctx; BUG_ON(!mutex_is_locked(req-ring-dev-struct_mutex)); @@ -2696,7 +2713,7 @@ static const struct fence_ops i915_gem_request_fops = { .enable_signaling = i915_gem_request_enable_signaling, .signaled = i915_gem_request_is_completed, .wait = fence_default_wait, - .release= i915_gem_request_free, + .release= i915_gem_request_release, .fence_value_str= i915_fence_value_str, .timeline_value_str = i915_fence_timeline_value_str, }; @@ -2992,6 +3009,21 @@
[Intel-gfx] [RFC 0/9] Convert requests to use struct fence
From: John Harrison john.c.harri...@intel.com There is a construct in the linux kernel called 'struct fence' that is intended to keep track of work that is executed on hardware. I.e. it solves the basic problem that the drivers 'struct drm_i915_gem_request' is trying to address. The request structure does quite a lot more than simply track the execution progress so is very definitely still required. However, the basic completion status side could be updated to use the ready made fence implementation and gain all the advantages that provides. Using the struct fence object also has the advantage that the fence can be used outside of the i915 driver (by other drivers or by userland applications). That is the basis of the dma-buff synchronisation API and allows asynchronous tracking of work completion. In this case, it allows applications to be signalled directly when a batch buffer completes without having to make an IOCTL call into the driver. This is work that was planned since the conversion of the driver from being seqno value based to being request structure based. This patch series does that work. [Patches against drm-intel-nightly tree fetched 15/07/2015] John Harrison (7): drm/i915: Convert requests to use struct fence drm/i915: Removed now redudant parameter to i915_gem_request_completed() drm/i915: Add per context timelines to fence object drm/i915: Delay the freeing of requests until retire time drm/i915: Interrupt driven fences drm/i915: Updated request structure tracing drm/i915: Add sync framework support to execbuff IOCTL Maarten Lankhorst (1): android: add sync_fence_create_dma Tvrtko Ursulin (1): staging/android/sync: Support sync points created from dma-fences drivers/gpu/drm/i915/i915_debugfs.c| 2 +- drivers/gpu/drm/i915/i915_drv.h| 73 +++--- drivers/gpu/drm/i915/i915_gem.c| 369 +++-- drivers/gpu/drm/i915/i915_gem_context.c| 15 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 90 ++- drivers/gpu/drm/i915/i915_irq.c| 2 +- drivers/gpu/drm/i915/i915_trace.h | 7 +- drivers/gpu/drm/i915/intel_display.c | 4 +- drivers/gpu/drm/i915/intel_lrc.c | 12 + drivers/gpu/drm/i915/intel_pm.c| 6 +- drivers/gpu/drm/i915/intel_ringbuffer.c| 4 + drivers/gpu/drm/i915/intel_ringbuffer.h| 7 + drivers/staging/android/sync.c | 13 +- drivers/staging/android/sync.h | 12 +- drivers/staging/android/sync_debug.c | 42 ++-- include/uapi/drm/i915_drm.h| 16 +- 16 files changed, 583 insertions(+), 91 deletions(-) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 9/9] drm/i915: Add sync framework support to execbuff IOCTL
From: John Harrison john.c.harri...@intel.com Various projects desire a mechanism for managing dependencies between work items asynchronously. This can also include work items across complete different and independent systems. For example, an application wants to retreive a frame from a video in device, using it for rendering on a GPU then send it to the video out device for display all without having to stall waiting for completion along the way. The sync framework allows this. It encapsulates synchronisation events in file descriptors. The application can request a sync point for the completion of each piece of work. Drivers should also take sync points in with each new work request and not schedule the work to start until the sync has been signalled. This patch adds sync framework support to the exec buffer IOCTL. A sync point can be passed in to stall execution of the batch buffer until signalled. And a sync point can be returned after each batch buffer submission which will be signalled upon that batch buffer's completion. At present, the input sync point is simply waited on synchronously inside the exec buffer IOCTL call. Once the GPU scheduler arrives, this will be handled asynchronously inside the scheduler and the IOCTL can return without having to wait. Note also that the scheduler will re-order the execution of batch buffers, e.g. because a batch buffer is stalled on a sync point and cannot be submitted yet but other, independent, batch buffers are being presented to the driver. This means that the timeline within the sync points returned cannot be global to the engine. Instead they must be kept per context per engine (the scheduler may not re-order batches within a context). Hence the timeline cannot be based on the existing seqno values but must be a new implementation. This patch is a port of work by several people that has been pulled across from Android. It has been updated several times across several patches. Rather than attempt to port each individual patch, this version is the finished product as a single patch. The various contributors/authors along the way (in addition to myself) were: Satyanantha RamaGopal M rama.gopal.m.satyanan...@intel.com Tvrtko Ursulin tvrtko.ursu...@intel.com Michel Thierry michel.thie...@intel.com Arun Siluvery arun.siluv...@linux.intel.com [new patch in series] For: VIZ-5190 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_drv.h| 6 ++ drivers/gpu/drm/i915/i915_gem.c| 84 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 90 -- include/uapi/drm/i915_drm.h| 16 +- 4 files changed, 188 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d7f1aa5..cf6b7cd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2168,6 +2168,7 @@ struct drm_i915_gem_request { struct list_head delay_free_list; bool cancelled; bool irq_enabled; + bool fence_external; /** On Which ring this request was generated */ struct drm_i915_private *i915; @@ -2252,6 +2253,11 @@ void i915_gem_request_notify(struct intel_engine_cs *ring); int i915_create_fence_timeline(struct drm_device *dev, struct intel_context *ctx, struct intel_engine_cs *ring); +#ifdef CONFIG_SYNC +struct sync_fence; +int i915_create_sync_fence(struct drm_i915_gem_request *req, int *fence_fd); +bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct sync_fence *fence); +#endif static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3f20087..de93422 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -37,6 +37,9 @@ #include linux/swap.h #include linux/pci.h #include linux/dma-buf.h +#ifdef CONFIG_SYNC +#include ../drivers/staging/android/sync.h +#endif #define RQ_BUG_ON(expr) @@ -2549,6 +2552,15 @@ void __i915_add_request(struct drm_i915_gem_request *request, */ i915_gem_request_submit(request); + /* +* If an external sync point has been requested for this request then +* it can be waited on without the driver's knowledge, i.e. without +* calling __i915_wait_request(). Thus interrupts must be enabled +* from the start rather than only on demand. +*/ + if (request-fence_external) + i915_gem_request_enable_interrupt(request); + if (i915.enable_execlists) ret = ring-emit_request(request); else { @@ -2857,6 +2869,78 @@ static uint32_t i915_fence_timeline_get_next_seqno(struct i915_fence_timeline *t return seqno; } +#ifdef CONFIG_SYNC +int i915_create_sync_fence(struct
[Intel-gfx] [RFC 3/9] drm/i915: Convert requests to use struct fence
From: John Harrison john.c.harri...@intel.com There is a construct in the linux kernel called 'struct fence' that is intended to keep track of work that is executed on hardware. I.e. it solves the basic problem that the drivers 'struct drm_i915_gem_request' is trying to address. The request structure does quite a lot more than simply track the execution progress so is very definitely still required. However, the basic completion status side could be updated to use the ready made fence implementation and gain all the advantages that provides. This patch makes the first step of integrating a struct fence into the request. It replaces the explicit reference count with that of the fence. It also replaces the 'is completed' test with the fence's equivalent. Currently, that simply chains on to the original request implementation. A future patch will improve this. For: VIZ-5190 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 45 + drivers/gpu/drm/i915/i915_gem.c | 58 ++--- drivers/gpu/drm/i915/intel_lrc.c| 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++ 5 files changed, 80 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cf6761c..79d346c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -50,6 +50,7 @@ #include linux/intel-iommu.h #include linux/kref.h #include linux/pm_qos.h +#include linux/fence.h /* General customization: */ @@ -2150,7 +2151,17 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, * initial reference taken using kref_init */ struct drm_i915_gem_request { - struct kref ref; + /** +* Underlying object for implementing the signal/wait stuff. +* NB: Never call fence_later() or return this fence object to user +* land! Due to lazy allocation, scheduler re-ordering, pre-emption, +* etc., there is no guarantee at all about the validity or +* sequentiality of the fence's seqno! It is also unsafe to let +* anything outside of the i915 driver get hold of the fence object +* as the clean up when decrementing the reference count requires +* holding the driver mutex lock. +*/ + struct fence fence; /** On Which ring this request was generated */ struct drm_i915_private *i915; @@ -2227,7 +2238,13 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring, struct intel_context *ctx, struct drm_i915_gem_request **req_out); void i915_gem_request_cancel(struct drm_i915_gem_request *req); -void i915_gem_request_free(struct kref *req_ref); + +static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, + bool lazy_coherency) +{ + return fence_is_signaled(req-fence); +} + int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, struct drm_file *file); @@ -2247,7 +2264,7 @@ static inline struct drm_i915_gem_request * i915_gem_request_reference(struct drm_i915_gem_request *req) { if (req) - kref_get(req-ref); + fence_get(req-fence); return req; } @@ -2255,7 +2272,7 @@ static inline void i915_gem_request_unreference(struct drm_i915_gem_request *req) { WARN_ON(!mutex_is_locked(req-ring-dev-struct_mutex)); - kref_put(req-ref, i915_gem_request_free); + fence_put(req-fence); } static inline void @@ -2267,7 +2284,7 @@ i915_gem_request_unreference__unlocked(struct drm_i915_gem_request *req) return; dev = req-ring-dev; - if (kref_put_mutex(req-ref, i915_gem_request_free, dev-struct_mutex)) + if (kref_put_mutex(req-fence.refcount, fence_release, dev-struct_mutex)) mutex_unlock(dev-struct_mutex); } @@ -2284,12 +2301,6 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, } /* - * XXX: i915_gem_request_completed should be here but currently needs the - * definition of i915_seqno_passed() which is below. It will be moved in - * a later patch when the call to i915_seqno_passed() is obsoleted... - */ - -/* * A command that requires special handling by the command parser. */ struct drm_i915_cmd_descriptor { @@ -2851,18 +2862,6 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) return (int32_t)(seq1 - seq2) = 0; } -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, - bool lazy_coherency) -{ - u32 seqno; - - BUG_ON(req == NULL); - - seqno = req-ring-get_seqno(req-ring, lazy_coherency); - - return i915_seqno_passed(seqno, req-seqno); -} - int __must_check i915_gem_get_seqno(struct
[Intel-gfx] [RFC 5/9] drm/i915: Add per context timelines to fence object
From: John Harrison john.c.harri...@intel.com The fence object used inside the request structure requires a sequence number. Although this is not used by the i915 driver itself, it could potentially be used by non-i915 code if the fence is passed outside of the driver. This is the intention as it allows external kernel drivers and user applications to wait on batch buffer completion asynchronously via the dma-buff fence API. To ensure that such external users are not confused by strange things happening with the seqno, this patch adds in a per context timeline that can provide a guaranteed in-order seqno value for the fence. This is safe because the scheduler will not re-order batch buffers within a context - they are considered to be mutually dependent. [new patch in series] For: VIZ-5190 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 25 drivers/gpu/drm/i915/i915_gem.c | 69 ++--- drivers/gpu/drm/i915/i915_gem_context.c | 15 ++- drivers/gpu/drm/i915/intel_lrc.c| 8 drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 5 files changed, 103 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0c7df46..88a4746 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -840,6 +840,15 @@ struct i915_ctx_hang_stats { bool banned; }; +struct i915_fence_timeline { + unsignedfence_context; + uint32_tcontext; + uint32_tnext; + + struct intel_context *ctx; + struct intel_engine_cs *ring; +}; + /* This must match up with the value previously used for execbuf2.rsvd1. */ #define DEFAULT_CONTEXT_HANDLE 0 @@ -885,6 +894,7 @@ struct intel_context { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; int pin_count; + struct i915_fence_timeline fence_timeline; } engine[I915_NUM_RINGS]; struct list_head link; @@ -2153,13 +2163,10 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, struct drm_i915_gem_request { /** * Underlying object for implementing the signal/wait stuff. -* NB: Never call fence_later() or return this fence object to user -* land! Due to lazy allocation, scheduler re-ordering, pre-emption, -* etc., there is no guarantee at all about the validity or -* sequentiality of the fence's seqno! It is also unsafe to let -* anything outside of the i915 driver get hold of the fence object -* as the clean up when decrementing the reference count requires -* holding the driver mutex lock. +* NB: Never return this fence object to user land! It is unsafe to +* let anything outside of the i915 driver get hold of the fence +* object as the clean up when decrementing the reference count +* requires holding the driver mutex lock. */ struct fence fence; @@ -2239,6 +2246,10 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring, struct drm_i915_gem_request **req_out); void i915_gem_request_cancel(struct drm_i915_gem_request *req); +int i915_create_fence_timeline(struct drm_device *dev, + struct intel_context *ctx, + struct intel_engine_cs *ring); + static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req) { return fence_is_signaled(req-fence); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3970250..af79716 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2671,6 +2671,25 @@ static bool i915_gem_request_is_completed(struct fence *req_fence) return i915_seqno_passed(seqno, req-seqno); } +static void i915_fence_timeline_value_str(struct fence *fence, char *str, int size) +{ + struct drm_i915_gem_request *req; + + req = container_of(fence, typeof(*req), fence); + + /* Last signalled timeline value ??? */ + snprintf(str, size, ? [%d]/*, tl-value*/, req-ring-get_seqno(req-ring, true)); +} + +static void i915_fence_value_str(struct fence *fence, char *str, int size) +{ + struct drm_i915_gem_request *req; + + req = container_of(fence, typeof(*req), fence); + + snprintf(str, size, %d [%d], req-fence.seqno, req-seqno); +} + static const struct fence_ops i915_gem_request_fops = { .get_driver_name= i915_gem_request_get_driver_name, .get_timeline_name = i915_gem_request_get_timeline_name, @@ -2678,8 +2697,48 @@ static const struct fence_ops i915_gem_request_fops = { .signaled = i915_gem_request_is_completed, .wait = fence_default_wait, .release= i915_gem_request_free, + .fence_value_str
[Intel-gfx] [RFC 8/9] drm/i915: Updated request structure tracing
From: John Harrison john.c.harri...@intel.com Added the '_complete' trace event which occurs when a fence/request is signaled as complete. Also moved the notify event from the IRQ handler code to inside the notify function itself. For: VIZ-5190 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 3 +++ drivers/gpu/drm/i915/i915_irq.c | 2 -- drivers/gpu/drm/i915/i915_trace.h | 7 +-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7c589a9..3f20087 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2752,6 +2752,8 @@ void i915_gem_request_notify(struct intel_engine_cs *ring) u32 seqno; LIST_HEAD(free_list); + trace_i915_gem_request_notify(ring); + if (list_empty(ring-fence_signal_list)) return; @@ -2764,6 +2766,7 @@ void i915_gem_request_notify(struct intel_engine_cs *ring) continue; fence_signal_locked(req-fence); + trace_i915_gem_request_complete(req); } list_del_init(req-signal_list); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index e446509..d4500cc 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -851,8 +851,6 @@ static void notify_ring(struct intel_engine_cs *ring) if (!intel_ring_initialized(ring)) return; - trace_i915_gem_request_notify(ring); - i915_gem_request_notify(ring); wake_up_all(ring-irq_queue); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 2f34c47..f455194 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -532,16 +532,19 @@ TRACE_EVENT(i915_gem_request_notify, __field(u32, dev) __field(u32, ring) __field(u32, seqno) +__field(bool, is_empty) ), TP_fast_assign( __entry-dev = ring-dev-primary-index; __entry-ring = ring-id; __entry-seqno = ring-get_seqno(ring, false); + __entry-is_empty = list_empty(ring-fence_signal_list); ), - TP_printk(dev=%u, ring=%u, seqno=%u, - __entry-dev, __entry-ring, __entry-seqno) + TP_printk(dev=%u, ring=%u, seqno=%u, empty=%d, + __entry-dev, __entry-ring, __entry-seqno, + __entry-is_empty) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_retire, -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 12/39] drm/i915: Added scheduler hook when closing DRM file handles
From: John Harrison john.c.harri...@intel.com The scheduler decouples the submission of batch buffers to the driver with submission of batch buffers to the hardware. Thus it is possible for an application to submit work, then close the DRM handle and free up all the resources that piece of work wishes to use before the work has even been submitted to the hardware. To prevent this, the scheduler needs to be informed of the DRM close event so that it can force through any outstanding work attributed to that file handle. Change-Id: I24ac056c062b075ff1cc5e2ed2d3fa8e17e85951 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_dma.c | 3 ++ drivers/gpu/drm/i915/i915_scheduler.c | 66 +++ drivers/gpu/drm/i915/i915_scheduler.h | 2 ++ 3 files changed, 71 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 5e63076..0a25017 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -47,6 +47,7 @@ #include linux/vga_switcheroo.h #include linux/slab.h #include acpi/video.h +#include i915_scheduler.h #include linux/pm.h #include linux/pm_runtime.h #include linux/oom.h @@ -1186,6 +1187,8 @@ void i915_driver_lastclose(struct drm_device *dev) void i915_driver_preclose(struct drm_device *dev, struct drm_file *file) { + i915_scheduler_closefile(dev, file); + mutex_lock(dev-struct_mutex); i915_gem_context_close(dev, file); i915_gem_release(dev, file); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index f7fd9a4..50bcccb 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -812,3 +812,69 @@ bool i915_scheduler_is_request_tracked(struct drm_i915_gem_request *req, return true; } + +int i915_scheduler_closefile(struct drm_device *dev, struct drm_file *file) +{ + struct i915_scheduler_queue_entry *node; + struct drm_i915_private*dev_priv = dev-dev_private; + struct i915_scheduler *scheduler = dev_priv-scheduler; + struct drm_i915_gem_request*req; + struct intel_engine_cs *ring; + int i, ret; + unsigned long flags; + boolfound; + + if (!scheduler) + return 0; + + for_each_ring(ring, dev_priv, i) { + do { + spin_lock_irqsave(scheduler-lock, flags); + + found = false; + list_for_each_entry(node, scheduler-node_queue[ring-id], link) { + if (I915_SQS_IS_COMPLETE(node)) + continue; + + if (node-params.file != file) + continue; + + found = true; + req = node-params.request; + i915_gem_request_reference(req); + break; + } + + spin_unlock_irqrestore(scheduler-lock, flags); + + if (found) { + do { + mutex_lock(dev-struct_mutex); + ret = i915_wait_request(req); + mutex_unlock(dev-struct_mutex); + if (ret == -EAGAIN) + msleep(20); + } while (ret == -EAGAIN); + + mutex_lock(dev-struct_mutex); + i915_gem_request_unreference(req); + mutex_unlock(dev-struct_mutex); + } + } while (found); + } + + spin_lock_irqsave(scheduler-lock, flags); + for_each_ring(ring, dev_priv, i) { + list_for_each_entry(node, scheduler-node_queue[ring-id], link) { + if (node-params.file != file) + continue; + + WARN_ON(!I915_SQS_IS_COMPLETE(node)); + + node-params.file = NULL; + } + } + spin_unlock_irqrestore(scheduler-lock, flags); + + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 88cbfba..fbb6f7b 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -86,6 +86,8 @@ enum { booli915_scheduler_is_enabled(struct drm_device *dev); int i915_scheduler_init(struct drm_device *dev); +int i915_scheduler_closefile(struct drm_device *dev, +struct drm_file *file); int i915_scheduler_queue_execbuffer(struct
[Intel-gfx] [RFC 17/39] drm/i915: Hook scheduler node clean up into retire requests
From: John Harrison john.c.harri...@intel.com The scheduler keeps its own lock on various DRM objects in order to guarantee safe access long after the original execbuff IOCTL has completed. This is especially important when pre-emption is enabled as the batch buffer might need to be submitted to the hardware multiple times. This patch hooks the clean up of these locks into the request retire function. The request can only be retired after it has completed on the hardware and thus is no longer eligible for re-submission. Thus there is no point holding on to the locks beyond that time. For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 3 +++ drivers/gpu/drm/i915/i915_scheduler.c | 51 --- drivers/gpu/drm/i915/i915_scheduler.h | 1 + 3 files changed, 39 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 77a3b27..cb5af5d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1405,6 +1405,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) if (!list_empty(request-signal_list)) request-cancelled = true; + if (request-scheduler_qe) + i915_gem_scheduler_clean_node(request-scheduler_qe); + i915_gem_request_unreference(request); } diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index f5fa968..df2e27f 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -432,6 +432,38 @@ int i915_scheduler_handle_irq(struct intel_engine_cs *ring) return 0; } +void i915_gem_scheduler_clean_node(struct i915_scheduler_queue_entry *node) +{ + uint32_t i; + + if (WARN_ON(!I915_SQS_IS_COMPLETE(node))) + return; + + if (node-params.batch_obj) { + /* The batch buffer must be unpinned before it is unreferenced +* otherwise the unpin fails with a missing vma!? */ + if (node-params.dispatch_flags I915_DISPATCH_SECURE) + i915_gem_execbuff_release_batch_obj(node-params.batch_obj); + + node-params.batch_obj = NULL; + } + + /* Release the locked buffers: */ + for (i = 0; i node-num_objs; i++) { + drm_gem_object_unreference( + node-saved_objects[i].obj-base); + } + kfree(node-saved_objects); + node-saved_objects = NULL; + node-num_objs = 0; + + /* Context too: */ + if (node-params.ctx) { + i915_gem_context_unreference(node-params.ctx); + node-params.ctx = NULL; + } +} + static int i915_scheduler_remove(struct intel_engine_cs *ring) { struct drm_i915_private *dev_priv = ring-dev-dev_private; @@ -441,7 +473,7 @@ static int i915_scheduler_remove(struct intel_engine_cs *ring) int flying = 0, queued = 0; int ret = 0; booldo_submit; - uint32_ti, min_seqno; + uint32_tmin_seqno; struct list_headremove; if (list_empty(scheduler-node_queue[ring-id])) @@ -535,21 +567,8 @@ static int i915_scheduler_remove(struct intel_engine_cs *ring) node = list_first_entry(remove, typeof(*node), link); list_del(node-link); - /* The batch buffer must be unpinned before it is unreferenced -* otherwise the unpin fails with a missing vma!? */ - if (node-params.dispatch_flags I915_DISPATCH_SECURE) - i915_gem_execbuff_release_batch_obj(node-params.batch_obj); - - /* Release the locked buffers: */ - for (i = 0; i node-num_objs; i++) { - drm_gem_object_unreference( - node-saved_objects[i].obj-base); - } - kfree(node-saved_objects); - - /* Context too: */ - if (node-params.ctx) - i915_gem_context_unreference(node-params.ctx); + /* Free up all the DRM object references */ + i915_gem_scheduler_clean_node(node); /* And anything else owned by the node: */ node-params.request-scheduler_qe = NULL; diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 15878a4..73c5e7d 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -88,6 +88,7 @@ booli915_scheduler_is_enabled(struct drm_device *dev); int i915_scheduler_init(struct drm_device *dev); int i915_scheduler_closefile(struct drm_device *dev, struct drm_file *file); +void
[Intel-gfx] [RFC 07/39] drm/i915: Start of GPU scheduler
From: John Harrison john.c.harri...@intel.com Initial creation of scheduler source files. Note that this patch implements most of the scheduler functionality but does not hook it in to the driver yet. It also leaves the scheduler code in 'pass through' mode so that even when it is hooked in, it will not actually do very much. This allows the hooks to be added one at a time in byte size chunks and only when the scheduler is finally enabled at the end does anything start happening. The general theory of operation is that when batch buffers are submitted to the driver, the execbuffer() code assigns a unique request and then packages up all the information required to execute the batch buffer at a later time. This package is given over to the scheduler which adds it to an internal node list. The scheduler also scans the list of objects associated with the batch buffer and compares them against the objects already in use by other buffers in the node list. If matches are found then the new batch buffer node is marked as being dependent upon the matching node. The same is done for the context object. The scheduler also bumps up the priority of such matching nodes on the grounds that the more dependencies a given batch buffer has the more important it is likely to be. The scheduler aims to have a given (tuneable) number of batch buffers in flight on the hardware at any given time. If fewer than this are currently executing when a new node is queued, then the node is passed straight through to the submit function. Otherwise it is simply added to the queue and the driver returns back to user land. As each batch buffer completes, it raises an interrupt which wakes up the scheduler. Note that it is possible for multiple buffers to complete before the IRQ handler gets to run. Further, it is possible for the seqno values to be un-ordered (particularly once pre-emption is enabled). However, the scheduler keeps the list of executing buffers in order of hardware submission. Thus it can scan through the list until a matching seqno is found and then mark all in flight nodes from that point on as completed. A deferred work queue is also poked by the interrupt handler. When this wakes up it can do more involved processing such as actually removing completed nodes from the queue and freeing up the resources associated with them (internal memory allocations, DRM object references, context reference, etc.). The work handler also checks the in flight count and calls the submission code if a new slot has appeared. When the scheduler's submit code is called, it scans the queued node list for the highest priority node that has no unmet dependencies. Note that the dependency calculation is complex as it must take inter-ring dependencies and potential preemptions into account. Note also that in the future this will be extended to include external dependencies such as the Android Native Sync file descriptors and/or the linux dma-buff synchronisation scheme. If a suitable node is found then it is sent to execbuff_final() for submission to the hardware. The in flight count is then re-checked and a new node popped from the list if appropriate. Note that this patch does not implement pre-emptive scheduling. Only basic scheduling by re-ordering batch buffer submission is currently implemented. Change-Id: I1e08f59e650a3c2bbaaa9de7627da33849b06106 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h | 4 + drivers/gpu/drm/i915/i915_gem.c | 5 + drivers/gpu/drm/i915/i915_scheduler.c | 776 ++ drivers/gpu/drm/i915/i915_scheduler.h | 91 5 files changed, 877 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_scheduler.c create mode 100644 drivers/gpu/drm/i915/i915_scheduler.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 47a74114..c367b39 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -9,6 +9,7 @@ ccflags-y := -Werror # core driver code i915-y := i915_drv.o \ i915_params.o \ + i915_scheduler.o \ i915_suspend.o \ i915_sysfs.o \ intel_pm.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a680778..7d2a494 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1700,6 +1700,8 @@ struct i915_execbuffer_params { struct drm_i915_gem_request *request; }; +struct i915_scheduler; + struct drm_i915_private { struct drm_device *dev; struct kmem_cache *objects; @@ -1932,6 +1934,8 @@ struct drm_i915_private { struct i915_runtime_pm pm; + struct i915_scheduler *scheduler; + /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct { int (*execbuf_submit)(struct i915_execbuffer_params *params,
[Intel-gfx] [RFC 15/39] drm/i915: Keep the reserved space mechanism happy
From: John Harrison john.c.harri...@intel.com Ring space is reserved when constructing a request to ensure that the subsequent 'add_request()' call cannot fail due to waiting for space on a busy or broken GPU. However, the scheduler jumps in to the middle of the execbuffer process between request creation and request submission. Thus it needs to cancel the reserved space when the request is simply added to the scheduler's queue and not yet submitted. Similarly, it needs to re-reserve the space when it finally does want to send the batch buffer to the hardware. For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 7 +++ drivers/gpu/drm/i915/i915_scheduler.c | 4 drivers/gpu/drm/i915/intel_lrc.c | 13 +++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 364e9cc..75d018d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1317,6 +1317,10 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params) /* The mutex must be acquired before calling this function */ BUG_ON(!mutex_is_locked(params-dev-struct_mutex)); + ret = intel_ring_reserve_space(params-request); + if (ret) + return ret; + intel_runtime_pm_get(dev_priv); /* @@ -1392,6 +1396,9 @@ error: */ intel_runtime_pm_put(dev_priv); + if (ret) + intel_ring_reserved_space_cancel(params-request-ringbuf); + return ret; } diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 3494fd5..e145829 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -95,6 +95,8 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe) qe-scheduler_index = scheduler-index++; + intel_ring_reserved_space_cancel(qe-params.request-ringbuf); + scheduler-flags[qe-params.ring-id] |= i915_sf_submitting; ret = dev_priv-gt.execbuf_final(qe-params); scheduler-flags[qe-params.ring-id] = ~i915_sf_submitting; @@ -126,6 +128,8 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe) node-stamp = stamp; i915_gem_request_reference(node-params.request); + intel_ring_reserved_space_cancel(node-params.request-ringbuf); + BUG_ON(node-params.request-scheduler_qe); node-params.request-scheduler_qe = node; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a8c78ec..76d5023 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -908,13 +908,17 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params) /* The mutex must be acquired before calling this function */ BUG_ON(!mutex_is_locked(params-dev-struct_mutex)); + ret = intel_logical_ring_reserve_space(params-request); + if (ret) + return ret; + /* * Unconditionally invalidate gpu caches and ensure that we do flush * any residual writes from the previous batch. */ ret = logical_ring_invalidate_all_caches(params-request); if (ret) - return ret; + goto err; if (ring == dev_priv-ring[RCS] params-instp_mode != dev_priv-relative_constants_mode) { @@ -938,13 +942,18 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params) ret = ring-emit_bb_start(params-request, exec_start, params-dispatch_flags); if (ret) - return ret; + goto err; trace_i915_gem_ring_dispatch(params-request, params-dispatch_flags); i915_gem_execbuffer_retire_commands(params); return 0; + +err: + intel_ring_reserved_space_cancel(params-request-ringbuf); + + return ret; } void intel_execlists_retire_requests(struct intel_engine_cs *ring) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 00/39] GPU scheduler for i915 driver
From: John Harrison john.c.harri...@intel.com Implemented a batch buffer submission scheduler for the i915 DRM driver. The general theory of operation is that when batch buffers are submitted to the driver, the execbuffer() code assigns a unique seqno value and then packages up all the information required to execute the batch buffer at a later time. This package is given over to the scheduler which adds it to an internal node list. The scheduler also scans the list of objects associated with the batch buffer and compares them against the objects already in use by other buffers in the node list. If matches are found then the new batch buffer node is marked as being dependent upon the matching node. The same is done for the context object. The scheduler also bumps up the priority of such matching nodes on the grounds that the more dependencies a given batch buffer has the more important it is likely to be. The scheduler aims to have a given (tuneable) number of batch buffers in flight on the hardware at any given time. If fewer than this are currently executing when a new node is queued, then the node is passed straight through to the submit function. Otherwise it is simply added to the queue and the driver returns back to user land. As each batch buffer completes, it raises an interrupt which wakes up the scheduler. Note that it is possible for multiple buffers to complete before the IRQ handler gets to run. Further, the seqno values of the individual buffers are not necessary incrementing as the scheduler may have re-ordered their submission. However, the scheduler keeps the list of executing buffers in order of hardware submission. Thus it can scan through the list until a matching seqno is found and then mark all in flight nodes from that point on as completed. A deferred work queue is also poked by the interrupt handler. When this wakes up it can do more involved processing such as actually removing completed nodes from the queue and freeing up the resources associated with them (internal memory allocations, DRM object references, context reference, etc.). The work handler also checks the in flight count and calls the submission code if a new slot has appeared. When the scheduler's submit code is called, it scans the queued node list for the highest priority node that has no unmet dependencies. Note that the dependency calculation is complex as it must take inter-ring dependencies and potential preemptions into account. Note also that in the future this will be extended to include external dependencies such as the Android Native Sync file descriptors and/or the linux dma-buff synchronisation scheme. If a suitable node is found then it is sent to execbuff_final() for submission to the hardware. The in flight count is then re-checked and a new node popped from the list if appropriate. The scheduler also allows high priority batch buffers (e.g. from a desktop compositor) to jump ahead of whatever is already running if the underlying hardware supports pre-emption. In this situation, any work that was pre-empted is returned to the queued list ready to be resubmitted when no more high priority work is outstanding. [Patches against drm-intel-nightly tree fetched 15/07/2015 with struct fence conversion patches applied] Dave Gordon (1): drm/i915: Updating assorted register and status page definitions John Harrison (38): drm/i915: Add total count to context status debugfs output drm/i915: Explicit power enable during deferred context initialisation drm/i915: Prelude to splitting i915_gem_do_execbuffer in two drm/i915: Split i915_dem_do_execbuffer() in half drm/i915: Re-instate request-uniq because it is extremely useful drm/i915: Start of GPU scheduler drm/i915: Prepare retire_requests to handle out-of-order seqnos drm/i915: Added scheduler hook into i915_gem_complete_requests_ring() drm/i915: Disable hardware semaphores when GPU scheduler is enabled drm/i915: Force MMIO flips when scheduler enabled drm/i915: Added scheduler hook when closing DRM file handles drm/i915: Added deferred work handler for scheduler drm/i915: Redirect execbuffer_final() via scheduler drm/i915: Keep the reserved space mechanism happy drm/i915: Added tracking/locking of batch buffer objects drm/i915: Hook scheduler node clean up into retire requests drm/i915: Added scheduler interrupt handler hook drm/i915: Added scheduler support to __wait_request() calls drm/i915: Added scheduler support to page fault handler drm/i915: Added scheduler flush calls to ring throttle and idle functions drm/i915: Add scheduler hook to GPU reset drm/i915: Added a module parameter for allowing scheduler overrides drm/i915: Support for 'unflushed' ring idle drm/i915: Defer seqno allocation until actual hardware submission time drm/i915: Added immediate submission override to scheduler drm/i915: Add sync wait support to scheduler drm/i915: Connecting execbuff fences to scheduler drm/i915:
[Intel-gfx] [RFC 03/39] drm/i915: Explicit power enable during deferred context initialisation
From: John Harrison john.c.harri...@intel.com A later patch in this series re-organises the batch buffer submission code. Part of that is to reduce the scope of a pm_get/put pair. Specifically, they previously wrapped the entire submission path from the very start to the very end, now they only wrap the actual hardware submission part in the back half. While that is a good thing in general, it causes a problem with the deferred context initialisation. That is done quite early on in the execbuf code path - it happens at context validation time rather than context switch time. Some of the deferred work requires the power to be enabled. Hence this patch adds an explicit power reference count to the deferred initialisation code itself. Change-Id: Id7b1535dfd8809a2bd5546272de2bbec39da2868 Issue: GMINL-5159 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 18dbd5c..8aa9a18 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2317,12 +2317,15 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, WARN_ON(ctx-legacy_hw_ctx.rcs_state != NULL); WARN_ON(ctx-engine[ring-id].state); + intel_runtime_pm_get(dev-dev_private); + context_size = round_up(get_lr_context_size(ring), 4096); ctx_obj = i915_gem_alloc_object(dev, context_size); if (!ctx_obj) { DRM_DEBUG_DRIVER(Alloc LRC backing obj failed.\n); - return -ENOMEM; + ret = -ENOMEM; + goto error_pm; } if (is_global_default_ctx) { @@ -2331,7 +2334,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, DRM_DEBUG_DRIVER(Pin LRC backing obj failed: %d\n, ret); drm_gem_object_unreference(ctx_obj-base); - return ret; + goto error_pm; } } @@ -2415,6 +2418,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, ctx-rcs_initialized = true; } + intel_runtime_pm_put(dev-dev_private); return 0; error: @@ -2428,6 +2432,8 @@ error_unpin_ctx: if (is_global_default_ctx) i915_gem_object_ggtt_unpin(ctx_obj); drm_gem_object_unreference(ctx_obj-base); +error_pm: + intel_runtime_pm_put(dev-dev_private); return ret; } -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 05/39] drm/i915: Split i915_dem_do_execbuffer() in half
From: John Harrison john.c.harri...@intel.com Split the execbuffer() function in half. The first half collects and validates all the information requried to process the batch buffer. It also does all the object pinning, relocations, active list management, etc - basically anything that must be done upfront before the IOCTL returns and allows the user land side to start changing/freeing things. The second half does the actual ring submission. This change implements the split but leaves the back half being called directly from the end of the front half. Change-Id: I5e1c77639ce526ab2401b0323186c518bf13da0a For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_drv.h| 11 +++ drivers/gpu/drm/i915/i915_gem.c| 2 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 130 - drivers/gpu/drm/i915/intel_lrc.c | 58 + drivers/gpu/drm/i915/intel_lrc.h | 1 + 5 files changed, 147 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 289ddd6..28d51ac 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1684,10 +1684,18 @@ struct i915_execbuffer_params { struct drm_device *dev; struct drm_file *file; uint32_tdispatch_flags; + uint32_targs_flags; uint32_targs_batch_start_offset; + uint32_targs_batch_len; + uint32_targs_num_cliprects; + uint32_targs_DR1; + uint32_targs_DR4; uint32_tbatch_obj_vm_offset; struct intel_engine_cs *ring; struct drm_i915_gem_object *batch_obj; + struct drm_clip_rect*cliprects; + uint32_tinstp_mask; + int instp_mode; struct intel_context*ctx; struct drm_i915_gem_request *request; }; @@ -1929,6 +1937,7 @@ struct drm_i915_private { int (*execbuf_submit)(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas); + int (*execbuf_final)(struct i915_execbuffer_params *params); int (*init_rings)(struct drm_device *dev); void (*cleanup_ring)(struct intel_engine_cs *ring); void (*stop_ring)(struct intel_engine_cs *ring); @@ -2743,9 +2752,11 @@ int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, void i915_gem_execbuffer_move_to_active(struct list_head *vmas, struct drm_i915_gem_request *req); void i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params); +void i915_gem_execbuff_release_batch_obj(struct drm_i915_gem_object *batch_obj); int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas); +int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params); int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_execbuffer2(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8150820..2a5667b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5481,11 +5481,13 @@ int i915_gem_init(struct drm_device *dev) if (!i915.enable_execlists) { dev_priv-gt.execbuf_submit = i915_gem_ringbuffer_submission; + dev_priv-gt.execbuf_final = i915_gem_ringbuffer_submission_final; dev_priv-gt.init_rings = i915_gem_init_rings; dev_priv-gt.cleanup_ring = intel_cleanup_ring_buffer; dev_priv-gt.stop_ring = intel_stop_ring_buffer; } else { dev_priv-gt.execbuf_submit = intel_execlists_submission; + dev_priv-gt.execbuf_final = intel_execlists_submission_final; dev_priv-gt.init_rings = intel_logical_rings_init; dev_priv-gt.cleanup_ring = intel_logical_ring_cleanup; dev_priv-gt.stop_ring = intel_logical_ring_stop; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 988ecd4..ba9d595 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1198,14 +1198,10 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args,
[Intel-gfx] [RFC 20/39] drm/i915: Added scheduler support to page fault handler
From: John Harrison john.c.harri...@intel.com GPU page faults can now require scheduler operation in order to complete. For example, in order to free up sufficient memory to handle the fault the handler must wait for a batch buffer to complete that has not even been sent to the hardware yet. Thus EAGAIN no longer means a GPU hang, it can occur under normal operation. Change-Id: Iff6bd2744ef12bb7405fbcd6b43c286caad4141f For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f713cda..dd9ebbe 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1919,10 +1919,15 @@ out: } case -EAGAIN: /* -* EAGAIN means the gpu is hung and we'll wait for the error -* handler to reset everything when re-faulting in +* EAGAIN can mean the gpu is hung and we'll have to wait for +* the error handler to reset everything when re-faulting in * i915_mutex_lock_interruptible. +* +* It can also indicate various other nonfatal errors for which +* the best response is to give other threads a chance to run, +* and then retry the failing operation in its entirety. */ + /*FALLTHRU*/ case 0: case -ERESTARTSYS: case -EINTR: -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 06/39] drm/i915: Re-instate request-uniq because it is extremely useful
From: John Harrison john.c.harri...@intel.com The seqno value cannot always be used when debugging issues via trace points. This is because it can be reset back to start, especially during TDR type tests. Also, when the scheduler arrives the seqno is only valid while a given request is executing on the hardware. While the request is simply queued waiting for submission, it's seqno value will be zero (meaning invalid). For: VIZ-5115 Signed-off-by: John Harrison john.c.harri...@intel.com Reviewed-by: Tomas Elf tomas@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 5 + drivers/gpu/drm/i915/i915_gem.c | 3 ++- drivers/gpu/drm/i915/i915_trace.h | 25 + 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 28d51ac..a680778 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1945,6 +1945,8 @@ struct drm_i915_private { bool edp_low_vswing; + uint32_t request_uniq; + /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. @@ -2186,6 +2188,9 @@ struct drm_i915_gem_request { /** GEM sequence number associated with this request. */ uint32_t seqno; + /* Unique identifier which can be used for trace points debug */ + uint32_t uniq; + /** Position in the ringbuffer of the start of the request */ u32 head; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2a5667b..0c407ae 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2828,7 +2828,7 @@ static void i915_fence_value_str(struct fence *fence, char *str, int size) req = container_of(fence, typeof(*req), fence); - snprintf(str, size, %d [%d], req-fence.seqno, req-seqno); + snprintf(str, size, %d [%d:%d], req-fence.seqno, req-uniq, req-seqno); } static const struct fence_ops i915_gem_request_fops = { @@ -2974,6 +2974,7 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring, req-i915 = dev_priv; req-ring = ring; + req-uniq = dev_priv-request_uniq++; req-ctx = ctx; i915_gem_context_reference(req-ctx); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index f455194..796c630 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -433,6 +433,7 @@ TRACE_EVENT(i915_gem_ring_sync_to, __field(u32, dev) __field(u32, sync_from) __field(u32, sync_to) +__field(u32, uniq_to) __field(u32, seqno) ), @@ -440,13 +441,14 @@ TRACE_EVENT(i915_gem_ring_sync_to, __entry-dev = from-dev-primary-index; __entry-sync_from = from-id; __entry-sync_to = to_req-ring-id; + __entry-uniq_to = to_req-uniq; __entry-seqno = i915_gem_request_get_seqno(req); ), - TP_printk(dev=%u, sync-from=%u, sync-to=%u, seqno=%u, + TP_printk(dev=%u, sync-from=%u, sync-to=%u, seqno=%u, to_uniq=%u, __entry-dev, __entry-sync_from, __entry-sync_to, - __entry-seqno) + __entry-seqno, __entry-uniq_to) ); TRACE_EVENT(i915_gem_ring_dispatch, @@ -481,6 +483,7 @@ TRACE_EVENT(i915_gem_ring_flush, TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) +__field(u32, uniq) __field(u32, invalidate) __field(u32, flush) ), @@ -488,12 +491,13 @@ TRACE_EVENT(i915_gem_ring_flush, TP_fast_assign( __entry-dev = req-ring-dev-primary-index; __entry-ring = req-ring-id; + __entry-uniq = req-uniq; __entry-invalidate = invalidate; __entry-flush = flush; ), - TP_printk(dev=%u, ring=%x, invalidate=%04x, flush=%04x, - __entry-dev, __entry-ring, + TP_printk(dev=%u, ring=%x, request=%u, invalidate=%04x, flush=%04x, + __entry-dev, __entry-ring, __entry-uniq, __entry-invalidate, __entry-flush) ); @@ -504,6 +508,7 @@ DECLARE_EVENT_CLASS(i915_gem_request, TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) +__field(u32, uniq)
[Intel-gfx] [RFC 13/39] drm/i915: Added deferred work handler for scheduler
From: John Harrison john.c.harri...@intel.com The scheduler needs to do interrupt triggered work that is too complex to do in the interrupt handler. Thus it requires a deferred work handler to process this work asynchronously. Change-Id: I0f7cc2b6f034a50bf8f7e368b60ad8bafd00f993 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ drivers/gpu/drm/i915/i915_drv.h | 10 ++ drivers/gpu/drm/i915/i915_gem.c | 2 ++ drivers/gpu/drm/i915/i915_scheduler.c | 23 +-- drivers/gpu/drm/i915/i915_scheduler.h | 1 + 5 files changed, 37 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 0a25017..4d3370f 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1084,6 +1084,9 @@ int i915_driver_unload(struct drm_device *dev) WARN_ON(unregister_oom_notifier(dev_priv-mm.oom_notifier)); unregister_shrinker(dev_priv-mm.shrinker); + /* Cancel the scheduler work handler, which should be idle now. */ + cancel_work_sync(dev_priv-mm.scheduler_work); + io_mapping_free(dev_priv-gtt.mappable); arch_phys_wc_del(dev_priv-gtt.mtrr); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 58f53ec..2b3fab6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1299,6 +1299,16 @@ struct i915_gem_mm { struct delayed_work retire_work; /** +* New scheme is to get an interrupt after every work packet +* in order to allow the low latency scheduling of pending +* packets. The idea behind adding new packets to a pending +* queue rather than directly into the hardware ring buffer +* is to allow high priority packets to over take low priority +* ones. +*/ + struct work_struct scheduler_work; + + /** * When we detect an idle GPU, we want to turn on * powersaving features. So once we see that there * are no more requests outstanding and no more diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e3c4032..77a3b27 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5631,6 +5631,8 @@ i915_gem_load(struct drm_device *dev) i915_gem_retire_work_handler); INIT_DELAYED_WORK(dev_priv-mm.idle_work, i915_gem_idle_work_handler); + INIT_WORK(dev_priv-mm.scheduler_work, + i915_gem_scheduler_work_handler); init_waitqueue_head(dev_priv-gpu_error.reset_queue); dev_priv-relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 50bcccb..3494fd5 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -407,12 +407,12 @@ int i915_scheduler_handle_irq(struct intel_engine_cs *ring) i915_scheduler_seqno_complete(ring, seqno); spin_unlock_irqrestore(scheduler-lock, flags); - /* XXX: Need to also call i915_scheduler_remove() via work handler. */ + queue_work(dev_priv-wq, dev_priv-mm.scheduler_work); return 0; } -int i915_scheduler_remove(struct intel_engine_cs *ring) +static int i915_scheduler_remove(struct intel_engine_cs *ring) { struct drm_i915_private *dev_priv = ring-dev-dev_private; struct i915_scheduler *scheduler = dev_priv-scheduler; @@ -531,6 +531,25 @@ int i915_scheduler_remove(struct intel_engine_cs *ring) return ret; } +void i915_gem_scheduler_work_handler(struct work_struct *work) +{ + struct intel_engine_cs *ring; + struct drm_i915_private *dev_priv; + struct drm_device *dev; + int i; + + dev_priv = container_of(work, struct drm_i915_private, mm.scheduler_work); + dev = dev_priv-dev; + + mutex_lock(dev-struct_mutex); + + for_each_ring(ring, dev_priv, i) { + i915_scheduler_remove(ring); + } + + mutex_unlock(dev-struct_mutex); +} + static void i915_scheduler_priority_bump_clear(struct i915_scheduler *scheduler) { struct i915_scheduler_queue_entry *node; diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index fbb6f7b..15878a4 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -90,6 +90,7 @@ int i915_scheduler_closefile(struct drm_device *dev, struct drm_file *file); int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe); int i915_scheduler_handle_irq(struct intel_engine_cs *ring); +voidi915_gem_scheduler_work_handler(struct work_struct *work); bool
[Intel-gfx] [RFC 26/39] drm/i915: Added immediate submission override to scheduler
From: John Harrison john.c.harri...@intel.com To aid with debugging issues related to the scheduler, it can be useful to ensure that all batch buffers are submitted immediately rather than queued until later. This change adds an override flag via the module parameter to force instant submission. Change-Id: I7652df53e2d3c3d77d78bebcf99856e2c53f2801 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_scheduler.c | 7 +-- drivers/gpu/drm/i915/i915_scheduler.h | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 224c8b4..c7139f8 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -238,8 +238,11 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe) list_add_tail(node-link, scheduler-node_queue[ring-id]); - not_flying = i915_scheduler_count_flying(scheduler, ring) -scheduler-min_flying; + if (i915.scheduler_override i915_so_submit_on_queue) + not_flying = true; + else + not_flying = i915_scheduler_count_flying(scheduler, ring) +scheduler-min_flying; spin_unlock_irqrestore(scheduler-lock, flags); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 7d743c9..ce94b0b 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -92,6 +92,7 @@ enum { /* Options for 'scheduler_override' module parameter: */ enum { i915_so_direct_submit = (1 0), + i915_so_submit_on_queue = (1 1), }; booli915_scheduler_is_enabled(struct drm_device *dev); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 10/39] drm/i915: Disable hardware semaphores when GPU scheduler is enabled
From: John Harrison john.c.harri...@intel.com Hardware sempahores require seqno values to be continuously incrementing. However, the scheduler's reordering of batch buffers means that the seqno values going through the hardware could be out of order. Thus semaphores can not be used. On the other hand, the scheduler superceeds the need for hardware semaphores anyway. Having one ring stall waiting for something to complete on another ring is inefficient if that ring could be working on some other, independent task. This is what the scheduler is meant to do - keep the hardware as busy as possible by reordering batch buffers to avoid dependency stalls. Change-Id: I95d1fceacd370455a9720d7dca55cfd0a1f6beaa For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 9 + drivers/gpu/drm/i915/i915_scheduler.c | 7 +++ drivers/gpu/drm/i915/i915_scheduler.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 4 4 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index db48aee..abd7efc 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -34,6 +34,7 @@ #include i915_drv.h #include i915_trace.h #include intel_drv.h +#include i915_scheduler.h #include linux/console.h #include linux/module.h @@ -516,6 +517,14 @@ void intel_detect_pch(struct drm_device *dev) bool i915_semaphore_is_enabled(struct drm_device *dev) { + /* Hardware semaphores are not compatible with the scheduler due to the +* seqno values being potentially out of order. However, semaphores are +* also not required as the scheduler will handle interring dependencies +* and try do so in a way that does not cause dead time on the hardware. +*/ + if (i915_scheduler_is_enabled(dev)) + return false; + if (INTEL_INFO(dev)-gen 6) return false; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 0d1cbe3..f7fd9a4 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -38,6 +38,13 @@ static int i915_scheduler_priority_bump(struct i915_scheduler *scheduler struct i915_scheduler_queue_entry *target, uint32_t bump); +bool i915_scheduler_is_enabled(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + + return dev_priv-scheduler != NULL; +} + int i915_scheduler_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev-dev_private; diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 6b2585a..88cbfba 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -84,6 +84,7 @@ enum { i915_sf_submitting = (1 1), }; +booli915_scheduler_is_enabled(struct drm_device *dev); int i915_scheduler_init(struct drm_device *dev); int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe); int i915_scheduler_handle_irq(struct intel_engine_cs *ring); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 83a5254..df0cd48 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -32,6 +32,7 @@ #include drm/i915_drm.h #include i915_trace.h #include intel_drv.h +#include i915_scheduler.h bool intel_ring_initialized(struct intel_engine_cs *ring) @@ -1379,6 +1380,9 @@ gen6_ring_sync(struct drm_i915_gem_request *waiter_req, u32 wait_mbox = signaller-semaphore.mbox.wait[waiter-id]; int ret; + /* Arithmetic on sequence numbers is unreliable with a scheduler. */ + BUG_ON(i915_scheduler_is_enabled(signaller-dev)); + /* Throughout all of the GEM code, seqno passed implies our current * seqno is = the last seqno executed. However for hardware the * comparison is strictly greater than. -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 27/39] drm/i915: Add sync wait support to scheduler
From: John Harrison john.c.harri...@intel.com There is a sync framework to allow work for multiple independent systems to be synchronised with each other but without stalling the CPU whether in the application or the driver. This patch adds support for this framework to the GPU scheduler. Batch buffers can now have sync framework fence objects associated with them. The scheduler will look at this fence when deciding what to submit next to the hardware. If the fence is outstanding then that batch buffer will be passed over in preference of one that is ready to run. If no other batches are ready then the scheduler will queue an asynchronous callback to be woken up when the fence has been signalled. The callback will wake the scheduler and submit the now ready batch buffer. For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_scheduler.c | 163 -- drivers/gpu/drm/i915/i915_scheduler.h | 6 ++ 3 files changed, 165 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 12b4986..b568432 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1703,6 +1703,7 @@ struct i915_execbuffer_params { uint32_tbatch_obj_vm_offset; struct intel_engine_cs *ring; struct drm_i915_gem_object *batch_obj; + struct sync_fence *fence_wait; struct drm_clip_rect*cliprects; uint32_tinstp_mask; int instp_mode; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index c7139f8..19577c9 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -25,6 +25,7 @@ #include i915_drv.h #include intel_drv.h #include i915_scheduler.h +#include ../drivers/staging/android/sync.h static int i915_scheduler_fly_node(struct i915_scheduler_queue_entry *node); static int i915_scheduler_remove_dependent(struct i915_scheduler *scheduler, @@ -100,6 +101,9 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe) qe-scheduler_index = scheduler-index++; + WARN_ON(qe-params.fence_wait + (atomic_read(qe-params.fence_wait-status) == 0)); + intel_ring_reserved_space_cancel(qe-params.request-ringbuf); scheduler-flags[qe-params.ring-id] |= i915_sf_submitting; @@ -134,6 +138,11 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe) if (qe-params.dispatch_flags I915_DISPATCH_SECURE) i915_gem_execbuff_release_batch_obj(qe-params.batch_obj); +#ifdef CONFIG_SYNC + if (qe-params.fence_wait) + sync_fence_put(qe-params.fence_wait); +#endif + return 0; } @@ -625,6 +634,11 @@ static int i915_scheduler_remove(struct intel_engine_cs *ring) node = list_first_entry(remove, typeof(*node), link); list_del(node-link); +#ifdef CONFIG_SYNC + if (node-params.fence_wait) + sync_fence_put(node-params.fence_wait); +#endif + /* Free up all the DRM object references */ i915_gem_scheduler_clean_node(node); @@ -845,17 +859,100 @@ static int i915_scheduler_submit_max_priority(struct intel_engine_cs *ring, return count; } +#ifdef CONFIG_SYNC +/* Use a private structure in order to pass the 'dev' pointer through */ +struct i915_sync_fence_waiter { + struct sync_fence_waiter sfw; + struct drm_device*dev; + struct i915_scheduler_queue_entry *node; +}; + +static void i915_scheduler_wait_fence_signaled(struct sync_fence *fence, + struct sync_fence_waiter *waiter) +{ + struct i915_sync_fence_waiter *i915_waiter; + struct drm_i915_private *dev_priv = NULL; + + i915_waiter = container_of(waiter, struct i915_sync_fence_waiter, sfw); + dev_priv= (i915_waiter i915_waiter-dev) ? + i915_waiter-dev-dev_private : NULL; + + /* +* NB: The callback is executed at interrupt time, thus it can not +* call _submit() directly. It must go via the delayed work handler. +*/ + if (dev_priv) { + struct i915_scheduler *scheduler; + unsigned long flags; + + scheduler = dev_priv-scheduler; + + spin_lock_irqsave(scheduler-lock, flags); + i915_waiter-node-flags = ~i915_qef_fence_waiting; + spin_unlock_irqrestore(scheduler-lock, flags); + + queue_work(dev_priv-wq, dev_priv-mm.scheduler_work); + } + +
[Intel-gfx] [RFC 32/39] drm/i915: Added debug state dump facilities to scheduler
From: John Harrison john.c.harri...@intel.com When debugging batch buffer submission issues, it is useful to be able to see what the current state of the scheduler is. This change adds functions for decoding the internal scheduler state and reporting it. Change-Id: I0634168e3f3465ff023f5a673165c90b07e535b6 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_scheduler.c | 276 ++ drivers/gpu/drm/i915/i915_scheduler.h | 14 ++ 2 files changed, 290 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index f0c99ad..e22f6b8 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -36,6 +36,9 @@ static int i915_scheduler_submit_max_priority(struct intel_engine_cs *ri bool is_locked); static uint32_ti915_scheduler_count_flying(struct i915_scheduler *scheduler, struct intel_engine_cs *ring); +static int i915_scheduler_dump_locked(struct intel_engine_cs *ring, + const char *msg); +static int i915_scheduler_dump_all_locked(struct drm_device *dev, const char *msg); static voidi915_scheduler_priority_bump_clear(struct i915_scheduler *scheduler); static int i915_scheduler_priority_bump(struct i915_scheduler *scheduler, struct i915_scheduler_queue_entry *target, @@ -53,6 +56,115 @@ bool i915_scheduler_is_enabled(struct drm_device *dev) return dev_priv-scheduler != NULL; } +const char *i915_qe_state_str(struct i915_scheduler_queue_entry *node) +{ + static char str[50]; + char*ptr = str; + + *(ptr++) = node-bumped ? 'B' : '-', + + *ptr = 0; + + return str; +} + +char i915_scheduler_queue_status_chr(enum i915_scheduler_queue_status status) +{ + switch (status) { + case i915_sqs_none: + return 'N'; + + case i915_sqs_queued: + return 'Q'; + + case i915_sqs_popped: + return 'X'; + + case i915_sqs_flying: + return 'F'; + + case i915_sqs_complete: + return 'C'; + + case i915_sqs_dead: + return 'D'; + + default: + break; + } + + return '?'; +} + +const char *i915_scheduler_queue_status_str( + enum i915_scheduler_queue_status status) +{ + static char str[50]; + + switch (status) { + case i915_sqs_none: + return None; + + case i915_sqs_queued: + return Queued; + + case i915_sqs_popped: + return Popped; + + case i915_sqs_flying: + return Flying; + + case i915_sqs_complete: + return Complete; + + case i915_sqs_dead: + return Dead; + + default: + break; + } + + sprintf(str, [Unknown_%d!], status); + return str; +} + +const char *i915_scheduler_flag_str(uint32_t flags) +{ + static char str[100]; + char *ptr = str; + + *ptr = 0; + +#define TEST_FLAG(flag, msg) \ + do {\ + if (flags (flag)) { \ + strcpy(ptr, msg); \ + ptr += strlen(ptr); \ + flags = ~(flag); \ + } \ + } while (0) + + TEST_FLAG(i915_sf_interrupts_enabled, IntOn|); + TEST_FLAG(i915_sf_submitting, Submitting|); + TEST_FLAG(i915_sf_dump_force, DumpForce|); + TEST_FLAG(i915_sf_dump_details, DumpDetails|); + TEST_FLAG(i915_sf_dump_dependencies, DumpDeps|); + +#undef TEST_FLAG + + if (flags) { + sprintf(ptr, Unknown_0x%X!, flags); + ptr += strlen(ptr); + } + + if (ptr == str) + strcpy(str, -); + else + ptr[-1] = 0; + + return str; +}; + int i915_scheduler_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev-dev_private; @@ -709,6 +821,170 @@ void i915_gem_scheduler_work_handler(struct work_struct *work) mutex_unlock(dev-struct_mutex); } +int i915_scheduler_dump_all(struct drm_device *dev, const char *msg) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + struct i915_scheduler *scheduler = dev_priv-scheduler; + unsigned long flags; + int ret; + + spin_lock_irqsave(scheduler-lock, flags); + ret = i915_scheduler_dump_all_locked(dev, msg); + spin_unlock_irqrestore(scheduler-lock, flags); + + return ret; +} +
[Intel-gfx] [RFC 24/39] drm/i915: Support for 'unflushed' ring idle
From: John Harrison john.c.harri...@intel.com When the seqno wraps around zero, the entire GPU is forced to be idle for some reason (possibly only to work around issues with hardware semaphores but no-one seems too sure!). This causes a problem if the force idle occurs at an inopportune moment such as in the middle of submitting a batch buffer. Specifically, it would lead to recursive submits - submitting work requires a new seqno, the new seqno requires idling the ring, idling the ring requires submitting work, submitting work requires a new seqno... This change adds a 'flush' parameter to the idle function call which specifies whether the scheduler queues should be flushed out. I.e. is the call intended to just idle the ring as it is right now (no flush) or is it intended to force all outstanding work out of the system (with flush). In the seqno wrap case, pending work is not an issue because the next operation will be to submit it. However, in other cases, the intention is to make sure everything that could be done has been done. Change-Id: I182e9a5853666c64ecc9e84d8a8b820a7f8e8836 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- drivers/gpu/drm/i915/intel_lrc.c| 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 17 +++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6d72caa..20c696f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2474,7 +2474,7 @@ i915_gem_init_seqno(struct drm_device *dev, u32 seqno) /* Carefully retire all requests without writing to the rings */ for_each_ring(ring, dev_priv, i) { - ret = intel_ring_idle(ring); + ret = intel_ring_idle(ring, false); if (ret) return ret; } @@ -3732,7 +3732,7 @@ int i915_gpu_idle(struct drm_device *dev) i915_add_request_no_flush(req); } - ret = intel_ring_idle(ring); + ret = intel_ring_idle(ring, true); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 76d5023..a811d0b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -990,7 +990,7 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) if (!intel_ring_initialized(ring)) return; - ret = intel_ring_idle(ring); + ret = intel_ring_idle(ring, true); if (ret !i915_reset_in_progress(to_i915(ring-dev)-gpu_error)) DRM_ERROR(failed to quiesce %s whilst cleaning up: %d\n, ring-name, ret); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e0992b7..afb04de 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2177,9 +2177,22 @@ static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf) intel_ring_update_space(ringbuf); } -int intel_ring_idle(struct intel_engine_cs *ring) +int intel_ring_idle(struct intel_engine_cs *ring, bool flush) { struct drm_i915_gem_request *req; + int ret; + + /* +* NB: Must not flush the scheduler if this idle request is from +* within an execbuff submission (i.e. due to 'get_seqno' calling +* 'wrap_seqno' calling 'idle'). As that would lead to recursive +* flushes! +*/ + if (flush) { + ret = i915_scheduler_flush(ring, true); + if (ret) + return ret; + } /* Wait upon the last request to be completed */ if (list_empty(ring-request_list)) @@ -2983,7 +2996,7 @@ intel_stop_ring_buffer(struct intel_engine_cs *ring) if (!intel_ring_initialized(ring)) return; - ret = intel_ring_idle(ring); + ret = intel_ring_idle(ring, true); if (ret !i915_reset_in_progress(to_i915(ring-dev)-gpu_error)) DRM_ERROR(failed to quiesce %s whilst cleaning up: %d\n, ring-name, ret); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 9457774..2f30900 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -487,7 +487,7 @@ void intel_ring_update_space(struct intel_ringbuffer *ringbuf); int intel_ring_space(struct intel_ringbuffer *ringbuf); bool intel_ring_stopped(struct intel_engine_cs *ring); -int __must_check intel_ring_idle(struct intel_engine_cs *ring); +int __must_check intel_ring_idle(struct intel_engine_cs *ring, bool flush); void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno); int
[Intel-gfx] [RFC 36/39] drm/i915: Add scheduler support functions for TDR
From: John Harrison john.c.harri...@intel.com Change-Id: I720463f01c4edd3579ce52e315a85e4d7874d7e5 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_scheduler.c | 31 +++ drivers/gpu/drm/i915/i915_scheduler.h | 1 + 2 files changed, 32 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 7be1c89..631f4e6 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -1693,6 +1693,37 @@ int i915_scheduler_closefile(struct drm_device *dev, struct drm_file *file) return 0; } +/* + * Used by TDR to distinguish hung rings (not moving but with work to do) + * from idle rings (not moving because there is nothing to do). + */ +bool i915_scheduler_is_ring_flying(struct intel_engine_cs *ring) +{ + struct drm_i915_private *dev_priv = ring-dev-dev_private; + struct i915_scheduler *scheduler = dev_priv-scheduler; + struct i915_scheduler_queue_entry *node; + unsigned long flags; + boolfound = false; + + /* With the scheduler in bypass mode, no information can be returned. */ + if (i915.scheduler_override i915_so_direct_submit) { + return true; + } + + spin_lock_irqsave(scheduler-lock, flags); + + list_for_each_entry(node, scheduler-node_queue[ring-id], link) { + if (I915_SQS_IS_FLYING(node)) { + found = true; + break; + } + } + + spin_unlock_irqrestore(scheduler-lock, flags); + + return found; +} + bool i915_scheduler_file_queue_is_full(struct drm_file *file) { struct drm_i915_file_private *file_priv = file-driver_priv; diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 6e6e3a0..2113e7d 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -154,6 +154,7 @@ int i915_scheduler_closefile(struct drm_device *dev, voidi915_gem_scheduler_clean_node(struct i915_scheduler_queue_entry *node); int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe); int i915_scheduler_handle_irq(struct intel_engine_cs *ring); +booli915_scheduler_is_ring_flying(struct intel_engine_cs *ring); voidi915_scheduler_kill_all(struct drm_device *dev); voidi915_gem_scheduler_work_handler(struct work_struct *work); #ifdef CONFIG_SYNC -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 33/39] drm/i915: Add early exit to execbuff_final() if insufficient ring space
From: John Harrison john.c.harri...@intel.com One of the major purposes of the GPU scheduler is to avoid stalling the CPU when the GPU is busy and unable to accept more work. This change adds support to the ring submission code to allow a ring space check to be performed before attempting to submit a batch buffer to the hardware. If insufficient space is available then the scheduler can go away and come back later, letting the CPU get on with other work, rather than stalling and waiting for the hardware to catch up. Change-Id: I267159ce1150cb6714d34a49b841bcbe4bf66326 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 42 -- drivers/gpu/drm/i915/intel_lrc.c | 57 +++--- drivers/gpu/drm/i915/intel_ringbuffer.c| 24 + drivers/gpu/drm/i915/intel_ringbuffer.h| 1 + 4 files changed, 109 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c2a69d8..b701838 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1078,25 +1078,19 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, { struct intel_engine_cs *ring = req-ring; struct drm_i915_private *dev_priv = dev-dev_private; - int ret, i; + int i; if (!IS_GEN7(dev) || ring != dev_priv-ring[RCS]) { DRM_DEBUG(sol reset is gen7/rcs only\n); return -EINVAL; } - ret = intel_ring_begin(req, 4 * 3); - if (ret) - return ret; - for (i = 0; i 4; i++) { intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); intel_ring_emit(ring, 0); } - intel_ring_advance(ring); - return 0; } @@ -1315,6 +1309,7 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params) struct intel_engine_cs *ring = params-ring; u64 exec_start, exec_len; int ret, i; + uint32_t min_space; /* The mutex must be acquired before calling this function */ BUG_ON(!mutex_is_locked(params-dev-struct_mutex)); @@ -1336,8 +1331,36 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params) if (ret) return ret; + /* +* It would be a bad idea to run out of space while writing commands +* to the ring. One of the major aims of the scheduler is to not stall +* at any point for any reason. However, doing an early exit half way +* through submission could result in a partial sequence being written +* which would leave the engine in an unknown state. Therefore, check in +* advance that there will be enough space for the entire submission +* whether emitted by the code below OR by any other functions that may +* be executed before the end of final(). +* +* NB: This test deliberately overestimates, because that's easier than +* tracing every potential path that could be taken! +* +* Current measurements suggest that we may need to emit up to 744 bytes +* (186 dwords), so this is rounded up to 256 dwords here. Then we double +* that to get the free space requirement, because the block isn't allowed +* to span the transition from the end to the beginning of the ring. +*/ +#define I915_BATCH_EXEC_MAX_LEN 256/* max dwords emitted here */ + min_space = I915_BATCH_EXEC_MAX_LEN * 2 * sizeof(uint32_t); + ret = intel_ring_test_space(params-request-ringbuf, min_space); + if (ret) + goto early_error; + intel_runtime_pm_get(dev_priv); + ret = intel_ring_begin(params-request, I915_BATCH_EXEC_MAX_LEN); + if (ret) + goto error; + /* * Unconditionally invalidate gpu caches and ensure that we do flush * any residual writes from the previous batch. @@ -1356,10 +1379,6 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params) if (ring == dev_priv-ring[RCS] params-instp_mode != dev_priv-relative_constants_mode) { - ret = intel_ring_begin(params-request, 4); - if (ret) - goto error; - intel_ring_emit(ring, MI_NOOP); intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit(ring, INSTPM); @@ -1411,6 +1430,7 @@ error: */ intel_runtime_pm_put(dev_priv); +early_error: if (ret) intel_ring_reserved_space_cancel(params-request-ringbuf); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 41dca2a..3cedddb 100644 ---
[Intel-gfx] [RFC 08/39] drm/i915: Prepare retire_requests to handle out-of-order seqnos
From: John Harrison john.c.harri...@intel.com A major point of the GPU scheduler is that it re-orders batch buffers after they have been submitted to the driver. This leads to requests completing out of order. In turn, this means that the retire processing can no longer assume that all completed entries are at the front of the list. Rather than attempting to re-order the request list on a regular basis, it is better to simply scan the entire list. There is also a problem with doing the free of the request before the move to inactive. Thus the requests are now moved to a temporary list first, then the objects de-activated and finally the requests on the temporary list are freed. Change-Id: I7eb6793581d9d28eb832e0e94c116b7202fa1b26 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 54 +++-- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3fbc6ec..56405cd 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3171,6 +3171,10 @@ void i915_gem_reset(struct drm_device *dev) void i915_gem_retire_requests_ring(struct intel_engine_cs *ring) { + struct drm_i915_gem_object *obj, *obj_next; + struct drm_i915_gem_request *req, *req_next; + LIST_HEAD(deferred_request_free); + WARN_ON(i915_verify_lists(ring-dev)); /* @@ -3180,37 +3184,31 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) */ i915_gem_request_notify(ring); + /* +* Note that request entries might be out of order due to rescheduling +* and pre-emption. Thus both lists must be processed in their entirety +* rather than stopping at the first non-complete entry. +*/ + /* Retire requests first as we use it above for the early return. * If we retire requests last, we may use a later seqno and so clear * the requests lists without clearing the active list, leading to * confusion. */ - while (!list_empty(ring-request_list)) { - struct drm_i915_gem_request *request; - - request = list_first_entry(ring-request_list, - struct drm_i915_gem_request, - list); - - if (!i915_gem_request_completed(request)) - break; + list_for_each_entry_safe(req, req_next, ring-request_list, list) { + if (!i915_gem_request_completed(req)) + continue; - i915_gem_request_retire(request); + list_move_tail(req-list, deferred_request_free); } /* Move any buffers on the active list that are no longer referenced * by the ringbuffer to the flushing/inactive lists as appropriate, * before we free the context associated with the requests. */ - while (!list_empty(ring-active_list)) { - struct drm_i915_gem_object *obj; - - obj = list_first_entry(ring-active_list, - struct drm_i915_gem_object, - ring_list[ring-id]); - + list_for_each_entry_safe(obj, obj_next, ring-active_list, ring_list[ring-id]) { if (!list_empty(obj-last_read_req[ring-id]-list)) - break; + continue; i915_gem_object_retire__read(obj, ring-id); } @@ -3222,18 +3220,26 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) } while (!list_empty(ring-delayed_free_list)) { - struct drm_i915_gem_request *request; unsigned long flags; - request = list_first_entry(ring-delayed_free_list, - struct drm_i915_gem_request, - delay_free_list); + req = list_first_entry(ring-delayed_free_list, + struct drm_i915_gem_request, + delay_free_list); spin_lock_irqsave(ring-delayed_free_lock, flags); - list_del(request-delay_free_list); + list_del(req-delay_free_list); spin_unlock_irqrestore(ring-delayed_free_lock, flags); - i915_gem_request_free(request); + i915_gem_request_free(req); + } + + /* It should now be safe to actually free the requests */ + while (!list_empty(deferred_request_free)) { + req = list_first_entry(deferred_request_free, + struct drm_i915_gem_request, + list); + + i915_gem_request_retire(req); } WARN_ON(i915_verify_lists(ring-dev)); -- 1.9.1
[Intel-gfx] [RFC 34/39] drm/i915: Added scheduler statistic reporting to debugfs
From: John Harrison john.c.harri...@intel.com It is useful for know what the scheduler is doing for both debugging and performance analysis purposes. This change adds a bunch of counters and such that keep track of various scheduler operations (batches submitted, completed, flush requests, etc.). The data can then be read in userland via the debugfs mechanism. Change-Id: I3266c631cd70c9eeb2c235f88f493e60462f85d7 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c| 76 ++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 - drivers/gpu/drm/i915/i915_scheduler.c | 71 ++-- drivers/gpu/drm/i915/i915_scheduler.h | 35 ++ 4 files changed, 189 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 028fa8f..3c5c750 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3258,6 +3258,81 @@ static int i915_drrs_status(struct seq_file *m, void *unused) return 0; } +static int i915_scheduler_info(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *) m-private; + struct drm_device *dev = node-minor-dev; + struct drm_i915_private *dev_priv = dev-dev_private; + struct i915_scheduler *scheduler = dev_priv-scheduler; + struct i915_scheduler_stats *stats = scheduler-stats; + struct i915_scheduler_stats_nodes node_stats[I915_NUM_RINGS]; + struct intel_engine_cs *ring; + char str[50 * (I915_NUM_RINGS + 1)], name[50], *ptr; + int ret, i, r; + + ret = mutex_lock_interruptible(dev-mode_config.mutex); + if (ret) + return ret; + +#define PRINT_VAR(name, fmt, var) \ + do {\ + sprintf(str, %-22s, name);\ + ptr = str + strlen(str);\ + for_each_ring(ring, dev_priv, r) { \ + sprintf(ptr, %10 fmt, var); \ + ptr += strlen(ptr); \ + } \ + seq_printf(m, %s\n, str); \ + } while (0) + + PRINT_VAR(Ring name:, s, dev_priv-ring[r].name); + PRINT_VAR( Ring seqno, d, ring-get_seqno(ring, false)); + seq_putc(m, '\n'); + + seq_puts(m, Batch submissions:\n); + PRINT_VAR( Queued, u, stats[r].queued); + PRINT_VAR( Submitted,u, stats[r].submitted); + PRINT_VAR( Completed,u, stats[r].completed); + PRINT_VAR( Expired, u, stats[r].expired); + seq_putc(m, '\n'); + + seq_puts(m, Flush counts:\n); + PRINT_VAR( By object,u, stats[r].flush_obj); + PRINT_VAR( By request, u, stats[r].flush_req); + PRINT_VAR( Blanket, u, stats[r].flush_all); + PRINT_VAR( Entries bumped, u, stats[r].flush_bump); + PRINT_VAR( Entries submitted,u, stats[r].flush_submit); + seq_putc(m, '\n'); + + seq_puts(m, Miscellaneous:\n); + PRINT_VAR( ExecEarly retry, u, stats[r].exec_early); + PRINT_VAR( ExecFinal requeue,u, stats[r].exec_again); + PRINT_VAR( ExecFinal killed, u, stats[r].exec_dead); + PRINT_VAR( Fence wait, u, stats[r].fence_wait); + PRINT_VAR( Fence wait again, u, stats[r].fence_again); + PRINT_VAR( Fence wait ignore,u, stats[r].fence_ignore); + PRINT_VAR( Fence supplied, u, stats[r].fence_got); + PRINT_VAR( Hung flying, u, stats[r].kill_flying); + PRINT_VAR( Hung queued, u, stats[r].kill_queued); + seq_putc(m, '\n'); + + seq_puts(m, Queue contents:\n); + for_each_ring(ring, dev_priv, i) + i915_scheduler_query_stats(ring, node_stats + ring-id); + + for (i = 0; i (i915_sqs_MAX + 1); i++) { + sprintf(name, %s, i915_scheduler_queue_status_str(i)); + PRINT_VAR(name, d, node_stats[r].counts[i]); + } + seq_putc(m, '\n'); + +#undef PRINT_VAR + + mutex_unlock(dev-mode_config.mutex); + + return 0; +} + struct pipe_crc_info { const char *name; struct drm_device *dev; @@ -5250,6 +5325,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {i915_semaphore_status, i915_semaphore_status, 0}, {i915_shared_dplls_info, i915_shared_dplls_info, 0}, {i915_dp_mst_info, i915_dp_mst_info, 0}, + {i915_scheduler_info, i915_scheduler_info, 0}, {i915_wa_registers, i915_wa_registers, 0},
[Intel-gfx] [RFC 28/39] drm/i915: Connecting execbuff fences to scheduler
From: John Harrison john.c.harri...@intel.com The scheduler now supports sync framework fences being associated with batch buffers. The execbuff IOCTL allows such fences to be passed in from user land. This patch wires the two together so that the IOCTL no longer needs to stall on the fence immediately. Instead the stall is now swallowed by the scheduler's scheduling algorithm. For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 21 - drivers/gpu/drm/i915/i915_scheduler.c | 3 +++ drivers/gpu/drm/i915/i915_scheduler.h | 5 + 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1642701..1325b19 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1612,7 +1612,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, /* * Without a GPU scheduler, any fence waits must be done up front. */ - if (args-flags I915_EXEC_WAIT_FENCE) { + if ((args-flags I915_EXEC_WAIT_FENCE) + (i915.scheduler_override i915_so_direct_submit)) + { ret = i915_early_fence_wait(ring, fd_fence_wait); if (ret 0) return ret; @@ -1799,6 +1801,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params-ctx = ctx; #ifdef CONFIG_SYNC + if (args-flags I915_EXEC_WAIT_FENCE) { + if (fd_fence_wait 0) { + DRM_ERROR(Wait fence for ring %d has invalid id %d\n, + (int) ring-id, fd_fence_wait); + } else { + params-fence_wait = sync_fence_fdget(fd_fence_wait); + if (params-fence_wait == NULL) + DRM_ERROR(Invalid wait fence %d\n, + fd_fence_wait); + } + } + if (args-flags I915_EXEC_CREATE_FENCE) { /* * Caller has requested a sync fence. @@ -1865,6 +1879,11 @@ err: i915_gem_context_unreference(params-ctx); } +#ifdef CONFIG_SYNC + if (params-fence_wait) + sync_fence_put(params-fence_wait); +#endif + /* * If the request was created but not successfully submitted then it * must be freed again. If it was submitted then it is being tracked diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 19577c9..66dbc20 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -977,6 +977,9 @@ static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring, signalled = atomic_read(node-params.fence_wait-status) != 0; else signalled = true; + + if (!signalled) + signalled = i915_safe_to_ignore_fence(ring, node-params.fence_wait); #endif // CONFIG_SYNC has_local = false; diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 8ca4b4b..3f94512 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -110,6 +110,11 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *q int i915_scheduler_handle_irq(struct intel_engine_cs *ring); voidi915_scheduler_kill_all(struct drm_device *dev); voidi915_gem_scheduler_work_handler(struct work_struct *work); +#ifdef CONFIG_SYNC +struct drm_i915_gem_request *i915_scheduler_find_by_sync_value(struct intel_engine_cs *ring, + struct intel_context *ctx, + uint32_t sync_value); +#endif int i915_scheduler_flush(struct intel_engine_cs *ring, bool is_locked); int i915_scheduler_flush_request(struct drm_i915_gem_request *req, bool is_locked); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 22/39] drm/i915: Add scheduler hook to GPU reset
From: John Harrison john.c.harri...@intel.com When the watchdog resets the GPU, the scheduler needs to know so that it can clean up it's view of the world. All in flight nodes must be marked as dead so that the scheduler does not wait forever for them to complete. Also, all queued nodes must be marked as dead so that the scheduler does not dead lock the reset code by saying that the ring can not be idled and it must come back again later. Change-Id: I184eb59c5c1a1385f9c17db66c7cc46f8904eebd For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 2 ++ drivers/gpu/drm/i915/i915_scheduler.c | 63 --- drivers/gpu/drm/i915/i915_scheduler.h | 8 - 3 files changed, 68 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6142e68..6d72caa 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3187,6 +3187,8 @@ void i915_gem_reset(struct drm_device *dev) struct intel_engine_cs *ring; int i; + i915_scheduler_kill_all(dev); + /* * Before we free the objects from the requests, we need to inspect * them for finding the guilty party. As the requests only borrow diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 73c9ba6..3155f42 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -341,6 +341,56 @@ static void i915_scheduler_node_kill(struct i915_scheduler_queue_entry *node) node-status = i915_sqs_dead; } +/* Abandon a queued node completely. For example because the driver is being + * reset and it is not valid to preserve absolutely any state at all across the + * reinitialisation sequence. */ +static void i915_scheduler_node_kill_queued(struct i915_scheduler_queue_entry *node) +{ + BUG_ON(!node); + BUG_ON(!I915_SQS_IS_QUEUED(node)); + + node-status = i915_sqs_dead; +} + +/* The system is toast. Terminate all nodes with extreme prejudice. */ +void i915_scheduler_kill_all(struct drm_device *dev) +{ + struct i915_scheduler_queue_entry *node; + struct drm_i915_private *dev_priv = dev-dev_private; + struct i915_scheduler *scheduler = dev_priv-scheduler; + unsigned long flags; + int r; + + spin_lock_irqsave(scheduler-lock, flags); + + for (r = 0; r I915_NUM_RINGS; r++) { + list_for_each_entry(node, scheduler-node_queue[r], link) { + switch (node-status) { + case I915_SQS_CASE_COMPLETE: + break; + + case I915_SQS_CASE_FLYING: + i915_scheduler_node_kill(node); + break; + + case I915_SQS_CASE_QUEUED: + i915_scheduler_node_kill_queued(node); + break; + + default: + /* Wot no state?! */ + BUG(); + } + } + } + + memset(scheduler-last_irq_seqno, 0x00, sizeof(scheduler-last_irq_seqno)); + + spin_unlock_irqrestore(scheduler-lock, flags); + + queue_work(dev_priv-wq, dev_priv-mm.scheduler_work); +} + /* * The batch tagged with the indicated seqence number has completed. * Search the queue for it, update its status and those of any batches @@ -912,7 +962,7 @@ static int i915_scheduler_submit(struct intel_engine_cs *ring, bool was_locked) scheduler-flags[ring-id] = ~i915_sf_submitting; if (ret) { - bool requeue = true; + int requeue = 1; /* Oh dear! Either the node is broken or the ring is * busy. So need to kill the node or requeue it and try @@ -922,7 +972,7 @@ static int i915_scheduler_submit(struct intel_engine_cs *ring, bool was_locked) case ENODEV: case ENOENT: /* Fatal errors. Kill the node. */ - requeue = false; + requeue = -1; break; case EAGAIN: @@ -941,13 +991,18 @@ static int i915_scheduler_submit(struct intel_engine_cs *ring, bool was_locked) break; } - if (requeue) { + /* Check that the watchdog/reset code has not nuked +* the node while we weren't looking: */ + if (node-status == i915_sqs_dead) + requeue = 0; + + if (requeue == 1) { i915_scheduler_node_requeue(node);
[Intel-gfx] [RFC 11/39] drm/i915: Force MMIO flips when scheduler enabled
From: John Harrison john.c.harri...@intel.com Change-Id: Ice071af6d88306b0d1c53bdb651a1a3e20bdc1af For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b9e8113..9629dab 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -44,6 +44,7 @@ #include drm/drm_plane_helper.h #include drm/drm_rect.h #include linux/dma_remapping.h +#include i915_scheduler.h /* Primary plane formats for gen = 3 */ static const uint32_t i8xx_primary_formats[] = { @@ -11180,6 +11181,8 @@ static bool use_mmio_flip(struct intel_engine_cs *ring, return true; else if (i915.enable_execlists) return true; + else if (i915_scheduler_is_enabled(ring-dev)) + return true; else return ring != i915_gem_request_get_ring(obj-last_write_req); } -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 02/39] drm/i915: Updating assorted register and status page definitions
From: Dave Gordon david.s.gor...@intel.com Added various definitions that will be useful for the scheduler in general and pre-emptive context switching in particular. Change-Id: Ica805b94160426def51f5d520f5ce51c60864a98 For: VIZ-1587 Signed-off-by: Dave Gordon david.s.gor...@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 30 - drivers/gpu/drm/i915/intel_ringbuffer.h | 40 +++-- 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e9a95df..ae3e9f7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -250,6 +250,10 @@ #define MI_GLOBAL_GTT(122) #define MI_NOOPMI_INSTR(0, 0) +#define MI_NOOP_WRITE_ID (122) +#define MI_NOOP_ID_MASK ((122) - 1) +#define MI_NOOP_MID(id) ((id) MI_NOOP_ID_MASK) +#define MI_NOOP_WITH_ID(id)MI_INSTR(0, MI_NOOP_WRITE_ID|MI_NOOP_MID(id)) #define MI_USER_INTERRUPT MI_INSTR(0x02, 0) #define MI_WAIT_FOR_EVENT MI_INSTR(0x03, 0) #define MI_WAIT_FOR_OVERLAY_FLIP (116) @@ -267,6 +271,7 @@ #define MI_ARB_ON_OFF MI_INSTR(0x08, 0) #define MI_ARB_ENABLE(10) #define MI_ARB_DISABLE (00) +#define MI_ARB_CHECK MI_INSTR(0x05, 0) #define MI_BATCH_BUFFER_ENDMI_INSTR(0x0a, 0) #define MI_SUSPEND_FLUSH MI_INSTR(0x0b, 0) #define MI_SUSPEND_FLUSH_EN (10) @@ -316,6 +321,8 @@ #define MI_SEMAPHORE_SYNC_INVALID (316) #define MI_SEMAPHORE_SYNC_MASK(316) #define MI_SET_CONTEXT MI_INSTR(0x18, 0) +#define MI_CONTEXT_ADDR_MASK ((~0)12) +#define MI_SET_CONTEXT_FLAG_MASK ((112)-1) #define MI_MM_SPACE_GTT (18) #define MI_MM_SPACE_PHYSICAL (08) #define MI_SAVE_EXT_STATE_EN (13) @@ -335,6 +342,10 @@ #define MI_USE_GGTT (1 22) /* g4x+ */ #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) #define MI_STORE_DWORD_INDEX_SHIFT 2 +#define MI_STORE_REG_MEM MI_INSTR(0x24, 1) +#define MI_STORE_REG_MEM_GTT (1 22) +#define MI_STORE_REG_MEM_PREDICATE (1 21) + /* Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM: * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw * simply ignores the register load under certain conditions. @@ -349,7 +360,10 @@ #define MI_FLUSH_DWMI_INSTR(0x26, 1) /* for GEN6 */ #define MI_FLUSH_DW_STORE_INDEX (121) #define MI_INVALIDATE_TLB(118) +#define MI_FLUSH_DW_OP_NONE (014) #define MI_FLUSH_DW_OP_STOREDW (114) +#define MI_FLUSH_DW_OP_RSVD (214) +#define MI_FLUSH_DW_OP_STAMP (314) #define MI_FLUSH_DW_OP_MASK (314) #define MI_FLUSH_DW_NOTIFY (18) #define MI_INVALIDATE_BSD(17) @@ -1491,6 +1505,19 @@ enum skl_disp_power_wells { #define HSW_GTT_CACHE_EN 0x4024 #define GTT_CACHE_EN_ALL 0xF0007FFF + +/* + * Premption-related registers + */ +#define RING_UHPTR(base) ((base)+0x134) +#define UHPTR_GFX_ADDR_ALIGN (0x7) +#define UHPTR_VALID (0x1) +#define RING_PREEMPT_ADDR 0x0214c +#define PREEMPT_BATCH_LEVEL_MASK (0x3) +#define BB_PREEMPT_ADDR0x02148 +#define SBB_PREEMPT_ADDR 0x0213c +#define RS_PREEMPT_STATUS 0x0215c + #define GEN7_WR_WATERMARK 0x4028 #define GEN7_GFX_PRIO_CTRL 0x402C #define ARB_MODE 0x4030 @@ -6612,7 +6639,8 @@ enum skl_disp_power_wells { #define VLV_SPAREG2H 0xA194 #define GTFIFODBG 0x12 -#defineGT_FIFO_SBDROPERR (16) +#defineGT_FIFO_CPU_ERROR_MASK 0xf +#defineGT_FIFO_SDDROPERR (16) #defineGT_FIFO_BLOBDROPERR (15) #defineGT_FIFO_SB_READ_ABORTERR(14) #defineGT_FIFO_DROPERR (13) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2e68b73..9457774 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -49,6 +49,12 @@ struct intel_hw_status_page { #define I915_READ_MODE(ring) I915_READ(RING_MI_MODE((ring)-mmio_base)) #define I915_WRITE_MODE(ring, val) I915_WRITE(RING_MI_MODE((ring)-mmio_base), val) +#define I915_READ_UHPTR(ring) \ + I915_READ(RING_UHPTR((ring)-mmio_base)) +#define I915_WRITE_UHPTR(ring, val) \ + I915_WRITE(RING_UHPTR((ring)-mmio_base), val) +#define I915_READ_NOPID(ring) I915_READ(RING_NOPID((ring)-mmio_base)) + /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. */ @@ -415,10 +421,40 @@ intel_write_status_page(struct
[Intel-gfx] [RFC 21/39] drm/i915: Added scheduler flush calls to ring throttle and idle functions
From: John Harrison john.c.harri...@intel.com When requesting that all GPU work is completed, it is now necessary to get the scheduler involved in order to flush out work that queued and not yet submitted. Change-Id: I95dcc2a2ee5c1a844748621c333994ddd6cf6a66 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 17 - drivers/gpu/drm/i915/i915_scheduler.c | 45 +++ drivers/gpu/drm/i915/i915_scheduler.h | 1 + 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index dd9ebbe..6142e68 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3710,6 +3710,10 @@ int i915_gpu_idle(struct drm_device *dev) /* Flush everything onto the inactive list. */ for_each_ring(ring, dev_priv, i) { + ret = i915_scheduler_flush(ring, true); + if (ret 0) + return ret; + if (!i915.enable_execlists) { struct drm_i915_gem_request *req; @@ -4679,7 +4683,8 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; struct drm_i915_gem_request *request, *target = NULL; unsigned reset_counter; - int ret; + int i, ret; + struct intel_engine_cs *ring; ret = i915_gem_wait_for_error(dev_priv-gpu_error); if (ret) @@ -4689,6 +4694,16 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) if (ret) return ret; + for_each_ring(ring, dev_priv, i) { + /* Need a mechanism to flush out scheduler entries that were +* submitted more than 'recent_enough' time ago as well! In the +* meantime, just flush everything out to ensure that entries +* can not sit around indefinitely. */ + ret = i915_scheduler_flush(ring, false); + if (ret 0) + return ret; + } + spin_lock(file_priv-mm.lock); list_for_each_entry(request, file_priv-mm.request_list, client_list) { if (time_after_eq(request-emitted_jiffies, recent_enough)) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 811cbe4..73c9ba6 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -653,6 +653,51 @@ int i915_scheduler_flush_request(struct drm_i915_gem_request *req, return flush_count; } +int i915_scheduler_flush(struct intel_engine_cs *ring, bool is_locked) +{ + struct i915_scheduler_queue_entry *node; + struct drm_i915_private *dev_priv; + struct i915_scheduler *scheduler; + unsigned long flags; + boolfound; + int ret; + uint32_tcount = 0; + + if (!ring) + return -EINVAL; + + dev_priv = ring-dev-dev_private; + scheduler = dev_priv-scheduler; + + if (!scheduler) + return 0; + + BUG_ON(is_locked (scheduler-flags[ring-id] i915_sf_submitting)); + + do { + found = false; + spin_lock_irqsave(scheduler-lock, flags); + list_for_each_entry(node, scheduler-node_queue[ring-id], link) { + if (!I915_SQS_IS_QUEUED(node)) + continue; + + found = true; + break; + } + spin_unlock_irqrestore(scheduler-lock, flags); + + if (found) { + ret = i915_scheduler_submit(ring, is_locked); + if (ret 0) + return ret; + + count += ret; + } + } while (found); + + return count; +} + static void i915_scheduler_priority_bump_clear(struct i915_scheduler *scheduler) { struct i915_scheduler_queue_entry *node; diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index fcf2640..5e094d5 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -92,6 +92,7 @@ voidi915_gem_scheduler_clean_node(struct i915_scheduler_queue_entry *nod int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe); int i915_scheduler_handle_irq(struct intel_engine_cs *ring); voidi915_gem_scheduler_work_handler(struct work_struct *work); +int i915_scheduler_flush(struct intel_engine_cs *ring, bool is_locked); int i915_scheduler_flush_request(struct drm_i915_gem_request *req, bool is_locked); booli915_scheduler_is_request_tracked(struct drm_i915_gem_request
[Intel-gfx] [RFC 04/39] drm/i915: Prelude to splitting i915_gem_do_execbuffer in two
From: John Harrison john.c.harri...@intel.com The scheduler decouples the submission of batch buffers to the driver with their submission to the hardware. This basically means splitting the execbuffer() function in half. This change rearranges some code ready for the split to occur. Change-Id: Icc9c8afaac18821f3eb8a151a49f918f90c068a3 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 57 ++ drivers/gpu/drm/i915/intel_lrc.c | 18 +++--- 2 files changed, 47 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d95d472..988ecd4 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -926,10 +926,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, if (flush_domains I915_GEM_DOMAIN_GTT) wmb(); - /* Unconditionally invalidate gpu caches and ensure that we do flush -* any residual writes from the previous batch. -*/ - return intel_ring_invalidate_all_caches(req); + return 0; } static bool @@ -1253,17 +1250,6 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, } } - ret = i915_gem_execbuffer_move_to_gpu(params-request, vmas); - if (ret) - goto error; - - ret = i915_switch_context(params-request); - if (ret) - goto error; - - WARN(params-ctx-ppgtt params-ctx-ppgtt-pd_dirty_rings (1ring-id), -%s didn't clear reload\n, ring-name); - instp_mode = args-flags I915_EXEC_CONSTANTS_MASK; instp_mask = I915_EXEC_CONSTANTS_MASK; switch (instp_mode) { @@ -1301,6 +1287,32 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, goto error; } + ret = i915_gem_execbuffer_move_to_gpu(params-request, vmas); + if (ret) + goto error; + + i915_gem_execbuffer_move_to_active(vmas, params-request); + + /* To be split into two functions here... */ + + intel_runtime_pm_get(dev_priv); + + /* +* Unconditionally invalidate gpu caches and ensure that we do flush +* any residual writes from the previous batch. +*/ + ret = intel_ring_invalidate_all_caches(params-request); + if (ret) + goto error; + + /* Switch to the correct context for the batch */ + ret = i915_switch_context(params-request); + if (ret) + goto error; + + WARN(params-ctx-ppgtt params-ctx-ppgtt-pd_dirty_rings (1ring-id), +%s didn't clear reload\n, ring-name); + if (ring == dev_priv-ring[RCS] instp_mode != dev_priv-relative_constants_mode) { ret = intel_ring_begin(params-request, 4); @@ -1344,15 +1356,20 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, exec_start, exec_len, params-dispatch_flags); if (ret) - return ret; + goto error; } trace_i915_gem_ring_dispatch(params-request, params-dispatch_flags); - i915_gem_execbuffer_move_to_active(vmas, params-request); i915_gem_execbuffer_retire_commands(params); error: + /* +* intel_gpu_busy should also get a ref, so it will free when the device +* is really idle. +*/ + intel_runtime_pm_put(dev_priv); + kfree(cliprects); return ret; } @@ -1563,8 +1580,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } #endif - intel_runtime_pm_get(dev_priv); - ret = i915_mutex_lock_interruptible(dev); if (ret) goto pre_mutex_err; @@ -1759,10 +1774,6 @@ err: mutex_unlock(dev-struct_mutex); pre_mutex_err: - /* intel_gpu_busy should also get a ref, so it will free when the device -* is really idle. */ - intel_runtime_pm_put(dev_priv); - if (fd_fence_complete != -1) { sys_close(fd_fence_complete); args-rsvd2 = (__u64) -1; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8aa9a18..89f3bcd 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -613,10 +613,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req, if (flush_domains I915_GEM_DOMAIN_GTT) wmb(); - /* Unconditionally invalidate gpu caches and ensure that we do flush -* any residual writes from the previous batch. -*/ - return logical_ring_invalidate_all_caches(req); + return 0; } int intel_logical_ring_alloc_request_extras(struct
[Intel-gfx] [RFC 19/39] drm/i915: Added scheduler support to __wait_request() calls
From: John Harrison john.c.harri...@intel.com The scheduler can cause batch buffers, and hence requests, to be submitted to the ring out of order and asynchronously to their submission to the driver. Thus at the point of waiting for the completion of a given request, it is not even guaranteed that the request has actually been sent to the hardware yet. Even it is has been sent, it is possible that it could be pre-empted and thus 'unsent'. This means that it is necessary to be able to submit requests to the hardware during the wait call itself. Unfortunately, while some callers of __wait_request() release the mutex lock first, others do not (and apparently can not). Hence there is the ability to deadlock as the wait stalls for submission but the asynchronous submission is stalled for the mutex lock. This change hooks the scheduler in to the __wait_request() code to ensure correct behaviour. That is, flush the target batch buffer through to the hardware and do not deadlock waiting for something that cannot currently be submitted. Instead, the wait call must return EAGAIN at least as far back as necessary to release the mutex lock and allow the scheduler's asynchronous processing to get in and handle the pre-emption operation and eventually (re-)submit the work. Change-Id: I31fe6bc7e38f6ffdd843fcae16e7cc8b1e52a931 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_gem.c | 37 +++--- drivers/gpu/drm/i915/i915_scheduler.c | 91 + drivers/gpu/drm/i915/i915_scheduler.h | 2 + drivers/gpu/drm/i915/intel_display.c| 3 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 6 files changed, 128 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2b3fab6..e9e0736 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2972,7 +2972,8 @@ int __i915_wait_request(struct drm_i915_gem_request *req, unsigned reset_counter, bool interruptible, s64 *timeout, - struct intel_rps_client *rps); + struct intel_rps_client *rps, + bool is_locked); int __must_check i915_wait_request(struct drm_i915_gem_request *req); int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); int __must_check diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cb5af5d..f713cda 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1219,7 +1219,8 @@ int __i915_wait_request(struct drm_i915_gem_request *req, unsigned reset_counter, bool interruptible, s64 *timeout, - struct intel_rps_client *rps) + struct intel_rps_client *rps, + bool is_locked) { struct intel_engine_cs *ring = i915_gem_request_get_ring(req); struct drm_device *dev = ring-dev; @@ -1229,8 +1230,10 @@ int __i915_wait_request(struct drm_i915_gem_request *req, DEFINE_WAIT(wait); unsigned long timeout_expire; s64 before, now; - int ret; + int ret = 0; + boolbusy; + might_sleep(); WARN(!intel_irqs_enabled(dev_priv), IRQs disabled); if (list_empty(req-list)) @@ -1281,6 +1284,22 @@ int __i915_wait_request(struct drm_i915_gem_request *req, break; } + if (is_locked) { + /* If this request is being processed by the scheduler +* then it is unsafe to sleep with the mutex lock held +* as the scheduler may require the lock in order to +* progress the request. */ + if (i915_scheduler_is_request_tracked(req, NULL, busy)) { + if (busy) { + ret = -EAGAIN; + break; + } + } + + /* If the request is not tracked by the scheduler then the +* regular test can be done. */ + } + if (i915_gem_request_completed(req)) { ret = 0; break; @@ -1452,13 +1471,17 @@ i915_wait_request(struct drm_i915_gem_request *req) BUG_ON(!mutex_is_locked(dev-struct_mutex)); + ret = i915_scheduler_flush_request(req, true); + if (ret 0) + return ret; + ret = i915_gem_check_wedge(dev_priv-gpu_error, interruptible); if (ret) return ret; ret = __i915_wait_request(req,
[Intel-gfx] [RFC 23/39] drm/i915: Added a module parameter for allowing scheduler overrides
From: John Harrison john.c.harri...@intel.com It can be useful to be able to disable certain features (e.g. the entire scheduler) via a module parameter for debugging purposes. A parameter has the advantage of not being a compile time switch but without implying that it can be changed dynamically at runtime. Change-Id: I92f4c832be88f5b34b49b90d6a9903fac68f7004 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_params.c| 4 drivers/gpu/drm/i915/i915_scheduler.c | 7 +-- drivers/gpu/drm/i915/i915_scheduler.h | 5 + 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e9e0736..30552cc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2664,6 +2664,7 @@ struct i915_params { bool verbose_state_checks; bool nuclear_pageflip; int edp_vswing; + int scheduler_override; }; extern struct i915_params i915 __read_mostly; diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 7983fe4..a5320ff 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -53,6 +53,7 @@ struct i915_params i915 __read_mostly = { .verbose_state_checks = 1, .nuclear_pageflip = 0, .edp_vswing = 0, + .scheduler_override = 1, }; module_param_named(modeset, i915.modeset, int, 0400); @@ -186,3 +187,6 @@ MODULE_PARM_DESC(edp_vswing, Ignore/Override vswing pre-emph table selection from VBT (0=use value from vbt [default], 1=low power swing(200mV), 2=default swing(400mV))); + +module_param_named(scheduler_override, i915.scheduler_override, int, 0600); +MODULE_PARM_DESC(scheduler_override, Scheduler override mask (0 = none, 1 = direct submission [default])); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 3155f42..224c8b4 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -44,6 +44,9 @@ bool i915_scheduler_is_enabled(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev-dev_private; + if (i915.scheduler_override i915_so_direct_submit) + return false; + return dev_priv-scheduler != NULL; } @@ -92,7 +95,7 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe) BUG_ON(!scheduler); - if (1/*i915.scheduler_override i915_so_direct_submit*/) { + if (i915.scheduler_override i915_so_direct_submit) { int ret; qe-scheduler_index = scheduler-index++; @@ -466,7 +469,7 @@ int i915_scheduler_handle_irq(struct intel_engine_cs *ring) seqno = ring-get_seqno(ring, false); - if (1/*i915.scheduler_override i915_so_direct_submit*/) + if (i915.scheduler_override i915_so_direct_submit) return 0; if (seqno == scheduler-last_irq_seqno[ring-id]) { diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index b440e62..7d743c9 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -89,6 +89,11 @@ enum { i915_sf_submitting = (1 1), }; +/* Options for 'scheduler_override' module parameter: */ +enum { + i915_so_direct_submit = (1 0), +}; + booli915_scheduler_is_enabled(struct drm_device *dev); int i915_scheduler_init(struct drm_device *dev); int i915_scheduler_closefile(struct drm_device *dev, -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 01/39] drm/i915: Add total count to context status debugfs output
From: John Harrison john.c.harri...@intel.com When there are lots and lots and even more lots of contexts (e.g. when running with execlists) it is useful to be able to immediately see what the total context count is. Change-Id: If9726d4df86567100ecf53867b43f4753f08bf84 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index c50a798..05646fe 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1942,6 +1942,7 @@ static int i915_context_status(struct seq_file *m, void *unused) struct drm_i915_private *dev_priv = dev-dev_private; struct intel_engine_cs *ring; struct intel_context *ctx; + uint32_t count = 0; int ret, i; ret = mutex_lock_interruptible(dev-struct_mutex); @@ -1955,6 +1956,7 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_puts(m, HW context ); describe_ctx(m, ctx); + count++; for_each_ring(ring, dev_priv, i) { if (ring-default_context == ctx) seq_printf(m, (default context %s) , @@ -1983,6 +1985,8 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_putc(m, '\n'); } + seq_printf(m, Total: %d contexts\n, count); + mutex_unlock(dev-struct_mutex); return 0; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 37/39] drm/i915: GPU priority bumping to prevent starvation
From: John Harrison john.c.harri...@intel.com If a high priority task was to continuously submit batch buffers to the driver, it could starve out any lower priority task from getting any GPU time at all. To prevent this, the priority of a queued batch buffer is bumped each time it does not get submitted to the hardware. Change-Id: I0319c7d2f306c61a283f03edda9b5d09a6d3b621 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 28 drivers/gpu/drm/i915/i915_scheduler.c | 14 ++ drivers/gpu/drm/i915/i915_scheduler.h | 1 + 3 files changed, 43 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 3c5c750..509668f 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1152,6 +1152,33 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_scheduler_priority_max_fops, 0x%llx\n); static int +i915_scheduler_priority_bump_get(void *data, u64 *val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev-dev_private; + struct i915_scheduler *scheduler = dev_priv-scheduler; + + *val = (u64) scheduler-priority_level_bump; + return 0; +} + +static int +i915_scheduler_priority_bump_set(void *data, u64 val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev-dev_private; + struct i915_scheduler *scheduler = dev_priv-scheduler; + + scheduler-priority_level_bump = (u32) val; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_scheduler_priority_bump_fops, + i915_scheduler_priority_bump_get, + i915_scheduler_priority_bump_set, + 0x%llx\n); + +static int i915_scheduler_priority_preempt_get(void *data, u64 *val) { struct drm_device *dev = data; @@ -5349,6 +5376,7 @@ static const struct i915_debugfs_files { {i915_error_state, i915_error_state_fops}, {i915_next_seqno, i915_next_seqno_fops}, {i915_scheduler_priority_max, i915_scheduler_priority_max_fops}, + {i915_scheduler_priority_bump, i915_scheduler_priority_bump_fops}, {i915_scheduler_priority_preempt, i915_scheduler_priority_preempt_fops}, {i915_scheduler_min_flying, i915_scheduler_min_flying_fops}, {i915_scheduler_file_queue_max, i915_scheduler_file_queue_max_fops}, diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 631f4e6..8de3f0b 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -191,6 +191,7 @@ int i915_scheduler_init(struct drm_device *dev) /* Default tuning values: */ scheduler-priority_level_max = ~0U; + scheduler-priority_level_bump= 50; scheduler-priority_level_preempt = 900; scheduler-min_flying = 2; scheduler-file_queue_max = 64; @@ -1568,6 +1569,19 @@ static int i915_scheduler_submit(struct intel_engine_cs *ring, bool was_locked) ret = i915_scheduler_pop_from_queue_locked(ring, node, flags); } while (ret == 0); + /* +* Bump the priority of everything that was not submitted to prevent +* starvation of low priority tasks by a spamming high priority task. +*/ + i915_scheduler_priority_bump_clear(scheduler); + list_for_each_entry(node, scheduler-node_queue[ring-id], link) { + if (!I915_SQS_IS_QUEUED(node)) + continue; + + i915_scheduler_priority_bump(scheduler, node, +scheduler-priority_level_bump); + } + spin_unlock_irqrestore(scheduler-lock, flags); if (!was_locked) diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 2113e7d..8f3e42f 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -119,6 +119,7 @@ struct i915_scheduler { /* Tuning parameters: */ uint32_tpriority_level_max; + uint32_tpriority_level_bump; uint32_tpriority_level_preempt; uint32_tmin_flying; uint32_tfile_queue_max; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 30/39] drm/i915: Added scheduler queue throttling by DRM file handle
From: John Harrison john.c.harri...@intel.com The scheduler decouples the submission of batch buffers to the driver from their subsequent submission to the hardware. This means that an application which is continuously submitting buffers as fast as it can could potentialy flood the driver. To prevent this, the driver now tracks how many buffers are in progress (queued in software or executing in hardware) and limits this to a given (tunable) number. If this number is exceeded then the queue to the driver will return EAGAIN and thus prevent the scheduler's queue becoming arbitrarily large. Change-Id: I83258240aec7c810db08c006a3062d46aa91363f For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_drv.h| 2 ++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 +++ drivers/gpu/drm/i915/i915_scheduler.c | 34 ++ drivers/gpu/drm/i915/i915_scheduler.h | 2 ++ 4 files changed, 46 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b568432..e230632 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -334,6 +334,8 @@ struct drm_i915_file_private { } rps; struct intel_engine_cs *bsd_ring; + + u32 scheduler_queue_length; }; enum intel_dpll_id { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index f90a2c8..c2a69d8 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1935,6 +1935,10 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } + /* Throttle batch requests per device file */ + if (i915_scheduler_file_queue_is_full(file)) + return -EAGAIN; + /* Copy in the exec list from userland */ exec_list = drm_malloc_ab(sizeof(*exec_list), args-buffer_count); exec2_list = drm_malloc_ab(sizeof(*exec2_list), args-buffer_count); @@ -2018,6 +2022,10 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EINVAL; } + /* Throttle batch requests per device file */ + if (i915_scheduler_file_queue_is_full(file)) + return -EAGAIN; + exec2_list = kmalloc(sizeof(*exec2_list)*args-buffer_count, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); if (exec2_list == NULL) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 408bedc..f0c99ad 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -40,6 +40,8 @@ static voidi915_scheduler_priority_bump_clear(struct i915_scheduler *sch static int i915_scheduler_priority_bump(struct i915_scheduler *scheduler, struct i915_scheduler_queue_entry *target, uint32_t bump); +static voidi915_scheduler_file_queue_inc(struct drm_file *file); +static voidi915_scheduler_file_queue_dec(struct drm_file *file); bool i915_scheduler_is_enabled(struct drm_device *dev) { @@ -75,6 +77,7 @@ int i915_scheduler_init(struct drm_device *dev) scheduler-priority_level_max = ~0U; scheduler-priority_level_preempt = 900; scheduler-min_flying = 2; + scheduler-file_queue_max = 64; dev_priv-scheduler = scheduler; @@ -249,6 +252,8 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe) list_add_tail(node-link, scheduler-node_queue[ring-id]); + i915_scheduler_file_queue_inc(node-params.file); + if (i915.scheduler_override i915_so_submit_on_queue) not_flying = true; else @@ -630,6 +635,12 @@ static int i915_scheduler_remove(struct intel_engine_cs *ring) /* Strip the dependency info while the mutex is still locked */ i915_scheduler_remove_dependent(scheduler, node); + /* Likewise clean up the file descriptor before it might disappear. */ + if (node-params.file) { + i915_scheduler_file_queue_dec(node-params.file); + node-params.file = NULL; + } + continue; } @@ -1330,3 +1341,26 @@ int i915_scheduler_closefile(struct drm_device *dev, struct drm_file *file) return 0; } + +bool i915_scheduler_file_queue_is_full(struct drm_file *file) +{ + struct drm_i915_file_private *file_priv = file-driver_priv; + struct drm_i915_private *dev_priv = file_priv-dev_priv; + struct i915_scheduler*scheduler = dev_priv-scheduler; + + return file_priv-scheduler_queue_length = scheduler-file_queue_max; +} + +static void i915_scheduler_file_queue_inc(struct drm_file *file) +{ + struct
[Intel-gfx] [RFC 29/39] drm/i915: Added trace points to scheduler
From: John Harrison john.c.harri...@intel.com Added trace points to the scheduler to track all the various events, node state transitions and other interesting things that occur. Change-Id: I9886390cfc7897bc1faf50a104bc651d8baed8a5 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 + drivers/gpu/drm/i915/i915_scheduler.c | 34 - drivers/gpu/drm/i915/i915_trace.h | 208 + drivers/gpu/drm/i915/intel_lrc.c | 2 + 4 files changed, 244 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1325b19..f90a2c8 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1291,6 +1291,8 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, i915_gem_execbuffer_move_to_active(vmas, params-request); + trace_i915_gem_ring_queue(ring, params); + qe = container_of(params, typeof(*qe), params); ret = i915_scheduler_queue_execbuffer(qe); if (ret) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 66dbc20..408bedc 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -101,6 +101,8 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe) qe-scheduler_index = scheduler-index++; + trace_i915_scheduler_queue(qe-params.ring, qe); + WARN_ON(qe-params.fence_wait (atomic_read(qe-params.fence_wait-status) == 0)); @@ -253,6 +255,9 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe) not_flying = i915_scheduler_count_flying(scheduler, ring) scheduler-min_flying; + trace_i915_scheduler_queue(ring, node); + trace_i915_scheduler_node_state_change(ring, node); + spin_unlock_irqrestore(scheduler-lock, flags); if (not_flying) @@ -280,6 +285,9 @@ static int i915_scheduler_fly_node(struct i915_scheduler_queue_entry *node) node-status = i915_sqs_flying; + trace_i915_scheduler_fly(ring, node); + trace_i915_scheduler_node_state_change(ring, node); + if (!(scheduler-flags[ring-id] i915_sf_interrupts_enabled)) { boolsuccess = true; @@ -344,6 +352,8 @@ static void i915_scheduler_node_requeue(struct i915_scheduler_queue_entry *node) BUG_ON(!I915_SQS_IS_FLYING(node)); node-status = i915_sqs_queued; + trace_i915_scheduler_unfly(node-params.ring, node); + trace_i915_scheduler_node_state_change(node-params.ring, node); } /* Give up on a popped node completely. For example, because it is causing the @@ -354,6 +364,8 @@ static void i915_scheduler_node_kill(struct i915_scheduler_queue_entry *node) BUG_ON(!I915_SQS_IS_FLYING(node)); node-status = i915_sqs_dead; + trace_i915_scheduler_unfly(node-params.ring, node); + trace_i915_scheduler_node_state_change(node-params.ring, node); } /* Abandon a queued node completely. For example because the driver is being @@ -365,6 +377,7 @@ static void i915_scheduler_node_kill_queued(struct i915_scheduler_queue_entry *n BUG_ON(!I915_SQS_IS_QUEUED(node)); node-status = i915_sqs_dead; + trace_i915_scheduler_node_state_change(node-params.ring, node); } /* The system is toast. Terminate all nodes with extreme prejudice. */ @@ -429,8 +442,10 @@ static void i915_scheduler_seqno_complete(struct intel_engine_cs *ring, uint32_t * if a completed entry is found then there is no need to scan further. */ list_for_each_entry(node, scheduler-node_queue[ring-id], link) { - if (I915_SQS_IS_COMPLETE(node)) + if (I915_SQS_IS_COMPLETE(node)) { + trace_i915_scheduler_landing(ring, seqno, node); return; + } if (seqno == node-params.request-seqno) break; @@ -441,8 +456,12 @@ static void i915_scheduler_seqno_complete(struct intel_engine_cs *ring, uint32_t * like cache flushes and page flips. So don't complain about if * no node was found. */ - if (node-link == scheduler-node_queue[ring-id]) + if (node-link == scheduler-node_queue[ring-id]) { + trace_i915_scheduler_landing(ring, seqno, NULL); return; + } + + trace_i915_scheduler_landing(ring, seqno, node); WARN_ON(!I915_SQS_IS_FLYING(node)); @@ -457,6 +476,7 @@ static void i915_scheduler_seqno_complete(struct intel_engine_cs *ring, uint32_t /* Node was in flight so mark it as complete. */ node-status = i915_sqs_complete; +
[Intel-gfx] [RFC 25/39] drm/i915: Defer seqno allocation until actual hardware submission time
From: John Harrison john.c.harri...@intel.com The seqno value is now only used for the final test for completion of a request. It is no longer used to track the request through the software stack. Thus it is no longer necessary to allocate the seqno immediately with the request. Instead, it can be done lazily and left until the request is actually sent to the hardware. This is particular advantageous with a GPU scheduler as the requests can then be re-ordered between their creation and their hardware submission without having out of order seqnos. v2: i915_add_request() can't fail! v3: combine with 'drm/i915: Assign seqno at start of exec_final()' Various bits of code during the execbuf code path need a seqno value to be assigned to the request. This change makes this assignment explicit at the start of submission_final() rather than relying on an auto-generated seqno to have happened already. This is in preparation for a future patch which changes seqno values to be assigned lazily (during add_request). Change-Id: I0d922b84c517611a79fa6c2b9e730d4fe3671d6a For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_drv.h| 1 + drivers/gpu/drm/i915/i915_gem.c| 21 - drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 + drivers/gpu/drm/i915/intel_lrc.c | 13 + 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 30552cc..12b4986 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2201,6 +2201,7 @@ struct drm_i915_gem_request { /** GEM sequence number associated with this request. */ uint32_t seqno; + uint32_t reserved_seqno; /* Unique identifier which can be used for trace points debug */ uint32_t uniq; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 20c696f..7308838 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2524,6 +2524,9 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) /* reserve 0 for non-seqno */ if (dev_priv-next_seqno == 0) { + /* Why is the full re-initialisation required? Is it only for +* hardware semaphores? If so, could skip it in the case where +* semaphores are disabled? */ int ret = i915_gem_init_seqno(dev, 0); if (ret) return ret; @@ -2581,6 +2584,12 @@ void __i915_add_request(struct drm_i915_gem_request *request, WARN(ret, *_ring_flush_all_caches failed: %d!\n, ret); } + /* Make the request's seqno 'live': */ + if(!request-seqno) { + request-seqno = request-reserved_seqno; + WARN_ON(request-seqno != dev_priv-last_seqno); + } + /* Record the position of the start of the request so that * should we detect the updated seqno part-way through the * GPU processing the request, we never over-estimate the @@ -2821,6 +2830,9 @@ void i915_gem_request_notify(struct intel_engine_cs *ring) if (!complete) continue; } else { + /* How can this happen? */ + WARN_ON(req-seqno == 0); + if (!i915_seqno_passed(seqno, req-seqno)) continue; } @@ -3009,7 +3021,14 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring, if (req == NULL) return -ENOMEM; - ret = i915_gem_get_seqno(ring-dev, req-seqno); + /* +* Assign an identifier to track this request through the hardware +* but don't make it live yet. It could change in the future if this +* request gets overtaken. However, it still needs to be allocated +* in advance because the point of submission must not fail and seqno +* allocation can fail. +*/ + ret = i915_gem_get_seqno(ring-dev, req-reserved_seqno); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 61a5498..1642701 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1317,6 +1317,19 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params) /* The mutex must be acquired before calling this function */ BUG_ON(!mutex_is_locked(params-dev-struct_mutex)); + /* Make sure the request's seqno is the latest and greatest: */ + if(params-request-reserved_seqno != dev_priv-last_seqno) { + ret = i915_gem_get_seqno(ring-dev, params-request-reserved_seqno); + if
[Intel-gfx] [RFC 16/39] drm/i915: Added tracking/locking of batch buffer objects
From: John Harrison john.c.harri...@intel.com The scheduler needs to track interdependencies between batch buffers. These are calculated by analysing the object lists of the buffers and looking for commonality. The scheduler also needs to keep those buffers locked long after the initial IOCTL call has returned to user land. Change-Id: I31e3677ecfc2c9b5a908bda6acc4850432d55f1e For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 48 -- drivers/gpu/drm/i915/i915_scheduler.c | 33 ++-- 2 files changed, 76 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 75d018d..61a5498 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1498,7 +1498,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct i915_execbuffer_params *params = qe.params; const u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 dispatch_flags; - int ret; + int ret, i; bool need_relocs; int fd_fence_complete = -1; #ifdef CONFIG_SYNC @@ -1636,6 +1636,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto pre_mutex_err; } + qe.saved_objects = kzalloc( + sizeof(*qe.saved_objects) * args-buffer_count, + GFP_KERNEL); + if (!qe.saved_objects) { + ret = -ENOMEM; + goto err; + } + /* Look up object handles */ ret = eb_lookup_vmas(eb, exec, args, vm, file); if (ret) @@ -1756,7 +1764,26 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params-args_DR1= args-DR1; params-args_DR4= args-DR4; params-batch_obj = batch_obj; - params-ctx = ctx; + + /* +* Save away the list of objects used by this batch buffer for the +* purpose of tracking inter-buffer dependencies. +*/ + for (i = 0; i args-buffer_count; i++) { + /* +* NB: 'drm_gem_object_lookup()' increments the object's +* reference count and so must be matched by a +* 'drm_gem_object_unreference' call. +*/ + qe.saved_objects[i].obj = + to_intel_bo(drm_gem_object_lookup(dev, file, + exec[i].handle)); + } + qe.num_objs = i; + + /* Lock and save the context object as well. */ + i915_gem_context_reference(ctx); + params-ctx = ctx; #ifdef CONFIG_SYNC if (args-flags I915_EXEC_CREATE_FENCE) { @@ -1808,6 +1835,23 @@ err: i915_gem_context_unreference(ctx); eb_destroy(eb); + if (qe.saved_objects) { + /* Need to release the objects: */ + for (i = 0; i qe.num_objs; i++) { + if (!qe.saved_objects[i].obj) + continue; + + drm_gem_object_unreference( + qe.saved_objects[i].obj-base); + } + + kfree(qe.saved_objects); + + /* Context too */ + if (params-ctx) + i915_gem_context_unreference(params-ctx); + } + /* * If the request was created but not successfully submitted then it * must be freed again. If it was submitted then it is being tracked diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index e145829..f5fa968 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -108,7 +108,23 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe) if (ret) return ret; - /* Free everything that is owned by the QE structure: */ + /* Need to release the objects: */ + for (i = 0; i qe-num_objs; i++) { + if (!qe-saved_objects[i].obj) + continue; + + drm_gem_object_unreference(qe-saved_objects[i].obj-base); + } + + kfree(qe-saved_objects); + qe-saved_objects = NULL; + qe-num_objs = 0; + + /* Free the context object too: */ + if (qe-params.ctx) + i915_gem_context_unreference(qe-params.ctx); + + /* And anything else owned by the QE structure: */ kfree(qe-params.cliprects); if (qe-params.dispatch_flags I915_DISPATCH_SECURE) i915_gem_execbuff_release_batch_obj(qe-params.batch_obj); @@ -425,7 +441,7 @@ static
[Intel-gfx] [RFC 09/39] drm/i915: Added scheduler hook into i915_gem_complete_requests_ring()
From: John Harrison john.c.harri...@intel.com The GPU scheduler can cause requests to complete out of order. For example, because one request pre-empted others that had already been submitted. This means the simple seqno comparison is not necessarily valid. Instead, a check against what the scheduler is currently doing must be made to determine if a request has really completed. Change-Id: I149250a8f9382586514ca324aba1c53063b83e19 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 13 +++-- drivers/gpu/drm/i915/i915_scheduler.c | 31 +++ drivers/gpu/drm/i915/i915_scheduler.h | 2 ++ 4 files changed, 46 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7d2a494..58f53ec 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2238,6 +2238,8 @@ struct drm_i915_gem_request { /** process identifier submitting this request */ struct pid *pid; + struct i915_scheduler_queue_entry *scheduler_qe; + /** * The ELSP only accepts two elements at a time, so we queue * context/tail pairs on a given queue (ring-execlist_queue) until the diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 56405cd..e3c4032 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2772,6 +2772,7 @@ void i915_gem_request_notify(struct intel_engine_cs *ring) { struct drm_i915_gem_request *req, *req_next; unsigned long flags; + bool complete; u32 seqno; LIST_HEAD(free_list); @@ -2785,8 +2786,13 @@ void i915_gem_request_notify(struct intel_engine_cs *ring) spin_lock_irqsave(ring-fence_lock, flags); list_for_each_entry_safe(req, req_next, ring-fence_signal_list, signal_list) { if (!req-cancelled) { - if (!i915_seqno_passed(seqno, req-seqno)) - continue; + if (i915_scheduler_is_request_tracked(req, complete, NULL)) { + if (!complete) + continue; + } else { + if (!i915_seqno_passed(seqno, req-seqno)) + continue; + } fence_signal_locked(req-fence); trace_i915_gem_request_complete(req); @@ -2811,6 +2817,9 @@ void i915_gem_request_notify(struct intel_engine_cs *ring) i915_gem_request_unreference(req); } + + /* Necessary? Or does the fence_signal() call do an implicit wakeup? */ + wake_up_all(ring-irq_queue); } static void i915_fence_timeline_value_str(struct fence *fence, char *str, int size) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 71d8df7..0d1cbe3 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -119,6 +119,9 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe) node-stamp = stamp; i915_gem_request_reference(node-params.request); + BUG_ON(node-params.request-scheduler_qe); + node-params.request-scheduler_qe = node; + /* Need to determine the number of incomplete entries in the list as * that will be the maximum size of the dependency list. * @@ -363,6 +366,13 @@ static void i915_scheduler_seqno_complete(struct intel_engine_cs *ring, uint32_t got_changes = true; } + /* +* Avoid issues with requests not being signalled because their +* interrupt has already passed. +*/ + if (got_changes) + i915_gem_request_notify(ring); + /* Should submit new work here if flight list is empty but the DRM * mutex lock might not be available if a '__wait_request()' call is * blocking the system. */ @@ -504,6 +514,7 @@ int i915_scheduler_remove(struct intel_engine_cs *ring) i915_gem_execbuff_release_batch_obj(node-params.batch_obj); /* Free everything that is owned by the node: */ + node-params.request-scheduler_qe = NULL; i915_gem_request_unreference(node-params.request); kfree(node-params.cliprects); kfree(node-dep_list); @@ -774,3 +785,23 @@ static int i915_scheduler_remove_dependent(struct i915_scheduler *scheduler, return 0; } + +bool i915_scheduler_is_request_tracked(struct drm_i915_gem_request *req, + bool *completed, bool *busy) +{ + struct drm_i915_private *dev_priv = req-ring-dev-dev_private; + struct i915_scheduler *scheduler =
[Intel-gfx] [RFC 18/39] drm/i915: Added scheduler interrupt handler hook
From: John Harrison john.c.harri...@intel.com The scheduler needs to be informed of each batch buffer completion. This is done via the user interrupt mechanism. The epilogue of each batch buffer submission updates a sequence number value (seqno) and triggers a user interrupt. This change hooks the scheduler in to the processing of that interrupt via the notify_ring() function. The scheduler also has clean up code that needs to be done outside of the interrupt context, thus notify_ring() now also pokes the scheduler's work queue. Change-Id: I4724b3ad7782453a244f84744d54bf14f5b65a38 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f67d09b..40a2eff 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -36,6 +36,7 @@ #include i915_drv.h #include i915_trace.h #include intel_drv.h +#include i915_scheduler.h /** * DOC: interrupt handling @@ -851,6 +852,8 @@ static void notify_ring(struct intel_engine_cs *ring) if (!intel_ring_initialized(ring)) return; + i915_scheduler_handle_irq(ring); + i915_gem_request_notify(ring); wake_up_all(ring-irq_queue); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 35/39] drm/i915: Added seqno values to scheduler status dump
From: John Harrison john.c.harri...@intel.com It is useful to be able to see what seqnos have actually popped out of the hardware when viewing the scheduler status. Change-Id: Ie93e51c64328be2606b8b43440f6344d5f225426 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_scheduler.c | 10 ++ drivers/gpu/drm/i915/i915_scheduler.h | 1 + 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 1547b64..7be1c89 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -152,6 +152,7 @@ const char *i915_scheduler_flag_str(uint32_t flags) TEST_FLAG(i915_sf_dump_force, DumpForce|); TEST_FLAG(i915_sf_dump_details, DumpDetails|); TEST_FLAG(i915_sf_dump_dependencies, DumpDeps|); + TEST_FLAG(i915_sf_dump_seqno, DumpSeqno|); #undef TEST_FLAG @@ -861,6 +862,7 @@ static int i915_scheduler_dump_all_locked(struct drm_device *dev, const char *ms for_each_ring(ring, dev_priv, i) { scheduler-flags[ring-id] |= i915_sf_dump_force | i915_sf_dump_details | + i915_sf_dump_seqno | i915_sf_dump_dependencies; r = i915_scheduler_dump_locked(ring, msg); if (ret == 0) @@ -942,6 +944,14 @@ static int i915_scheduler_dump_locked(struct intel_engine_cs *ring, const char * return 0; } + if (scheduler-flags[ring-id] i915_sf_dump_seqno) { + uint32_tseqno; + + seqno= ring-get_seqno(ring, true); + + DRM_DEBUG_DRIVER(%s Seqno = %d\n, ring-name, seqno); + } + if (scheduler-flags[ring-id] i915_sf_dump_details) { int i, deps; uint32_tcount, counts[i915_sqs_MAX]; diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index dd0510c..6e6e3a0 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -137,6 +137,7 @@ enum { i915_sf_dump_force = (1 8), i915_sf_dump_details= (1 9), i915_sf_dump_dependencies = (1 10), + i915_sf_dump_seqno = (1 11), }; const char *i915_scheduler_flag_str(uint32_t flags); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 14/39] drm/i915: Redirect execbuffer_final() via scheduler
From: John Harrison john.c.harri...@intel.com Updated the execbuffer() code to pass the packaged up batch buffer information to the scheduler rather than calling execbuffer_final() directly. The scheduler queue() code is currently a stub which simply chains on to _final() immediately. Change-Id: I2a19062a9e66845f2e886332fc4b5fc7ac992864 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 19 +++ drivers/gpu/drm/i915/intel_lrc.c | 12 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index ba9d595..364e9cc 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -37,6 +37,7 @@ #ifdef CONFIG_SYNC #include ../drivers/staging/android/sync.h #endif +#include i915_scheduler.h #define __EXEC_OBJECT_HAS_PIN (131) #define __EXEC_OBJECT_HAS_FENCE (130) @@ -1198,6 +1199,7 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas) { + struct i915_scheduler_queue_entry *qe; struct drm_device *dev = params-dev; struct intel_engine_cs *ring = params-ring; struct drm_i915_private *dev_priv = dev-dev_private; @@ -1289,18 +1291,11 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, i915_gem_execbuffer_move_to_active(vmas, params-request); - ret = dev_priv-gt.execbuf_final(params); + qe = container_of(params, typeof(*qe), params); + ret = i915_scheduler_queue_execbuffer(qe); if (ret) goto error; - /* -* Free everything that was stored in the QE structure (until the -* scheduler arrives and does it instead): -*/ - kfree(params-cliprects); - if (params-dispatch_flags I915_DISPATCH_SECURE) - i915_gem_execbuff_release_batch_obj(params-batch_obj); - return 0; error: @@ -1492,8 +1487,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct intel_engine_cs *ring; struct intel_context *ctx; struct i915_address_space *vm; - struct i915_execbuffer_params params_master; /* XXX: will be removed later */ - struct i915_execbuffer_params *params = params_master; + struct i915_scheduler_queue_entry qe; + struct i915_execbuffer_params *params = qe.params; const u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 dispatch_flags; int ret; @@ -1624,7 +1619,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, else vm = dev_priv-gtt.base; - memset(params_master, 0x00, sizeof(params_master)); + memset(qe, 0x00, sizeof(qe)); eb = eb_create(args); if (eb == NULL) { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index bba1152..a8c78ec 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -136,6 +136,7 @@ #include drm/i915_drm.h #include i915_drv.h #include intel_mocs.h +#include i915_scheduler.h #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) @@ -827,6 +828,7 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas) { + struct i915_scheduler_queue_entry *qe; struct drm_device *dev = params-dev; struct intel_engine_cs *ring = params-ring; struct drm_i915_private *dev_priv = dev-dev_private; @@ -884,17 +886,11 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, i915_gem_execbuffer_move_to_active(vmas, params-request); - ret = dev_priv-gt.execbuf_final(params); + qe = container_of(params, typeof(*qe), params); + ret = i915_scheduler_queue_execbuffer(qe); if (ret) return ret; - /* -* Free everything that was stored in the QE structure (until the -* scheduler arrives and does it instead): -*/ - if (params-dispatch_flags I915_DISPATCH_SECURE) - i915_gem_execbuff_release_batch_obj(params-batch_obj); - return 0; } -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 38/39] drm/i915: Enable GPU scheduler by default
From: John Harrison john.c.harri...@intel.com Now that all the scheduler patches have been applied, it is safe to enable. Change-Id: I128042e85a30fca765ce1eb46c837c62dee66089 For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_params.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index a5320ff..4656518 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -53,7 +53,7 @@ struct i915_params i915 __read_mostly = { .verbose_state_checks = 1, .nuclear_pageflip = 0, .edp_vswing = 0, - .scheduler_override = 1, + .scheduler_override = 0, }; module_param_named(modeset, i915.modeset, int, 0400); @@ -189,4 +189,4 @@ MODULE_PARM_DESC(edp_vswing, 2=default swing(400mV))); module_param_named(scheduler_override, i915.scheduler_override, int, 0600); -MODULE_PARM_DESC(scheduler_override, Scheduler override mask (0 = none, 1 = direct submission [default])); +MODULE_PARM_DESC(scheduler_override, Scheduler override mask (default: 0)); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 39/39] drm/i915: Allow scheduler to manage inter-ring object synchronisation
From: John Harrison john.c.harri...@intel.com The scheduler has always tracked batch buffer dependencies based on DRM object usage. This means that it will not submit a batch on one ring that has outstanding dependencies still executing on other rings. This is exactly the same synchronisation performed by i915_gem_object_sync() using hardware semaphores where available and CPU stalls where not (e.g. in execlist mode and/or on Gen8 hardware). Unfortunately, when a batch buffer is submitted to the driver the _object_sync() call happens first. Thus in case where hardware semaphores are disabled, the driver has already stalled until the dependency has been resolved. This patch adds an optimisation to _object_sync() to ignore the synchronisation in the case where it will subsequently be handled by the scheduler. This removes the driver stall and (in the single application case) provides near hardware semaphore performance even when hardware semaphores are disabled. In a busy system where there is other work that can be executed on the stalling ring, it provides better than hardware semaphore performance as it removes the stall from both the driver and from the hardware. There is also a theory that this method should improve power usage as hardware semaphores are apparently not very power efficient - the stalled ring does not go into as low a power a state as when it is genuinely idle. The optimisation is to check whether both ends of the synchronisation are batch buffer requests. If they are, then the scheduler will have the inter-dependency tracked and managed. If one or other end is not a batch buffer request (e.g. a page flip) then the code falls back to the CPU stall or hardware semaphore as appropriate. To check whether the existing usage is a batch buffer, the code simply calls the 'are you tracking this request' function of the scheduler on the object's last_read_req member. To check whether the new usage is a batch buffer, a flag is passed in from the caller. Change-Id: Idc16e19b5a4dc8b3782ce9db44dd3df445f396c1 Issue: VIZ-5566 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_drv.h| 2 +- drivers/gpu/drm/i915/i915_gem.c| 19 +++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 2 +- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e230632..e4bef2c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2895,7 +2895,7 @@ int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); #endif int i915_gem_object_sync(struct drm_i915_gem_object *obj, struct intel_engine_cs *to, -struct drm_i915_gem_request **to_req); +struct drm_i915_gem_request **to_req, bool to_batch); void i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_request *req); int i915_gem_dumb_create(struct drm_file *file_priv, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7308838..e0dca8c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3507,7 +3507,7 @@ static int __i915_gem_object_sync(struct drm_i915_gem_object *obj, struct intel_engine_cs *to, struct drm_i915_gem_request *from_req, - struct drm_i915_gem_request **to_req) + struct drm_i915_gem_request **to_req, bool to_batch) { struct intel_engine_cs *from; int ret; @@ -3519,6 +3519,15 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, if (i915_gem_request_completed(from_req)) return 0; + /* +* The scheduler will manage inter-ring object dependencies +* as long as both to and from requests are scheduler managed +* (i.e. batch buffers). +*/ + if (to_batch + i915_scheduler_is_request_tracked(from_req, NULL, NULL)) + return 0; + if (!i915_semaphore_is_enabled(obj-base.dev)) { struct drm_i915_private *i915 = to_i915(obj-base.dev); ret = __i915_wait_request(from_req, @@ -3569,6 +3578,8 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, * @to_req: request we wish to use the object for. See below. * This will be allocated and returned if a request is * required but not passed in. + * @to_batch: is the sync request on behalf of batch buffer submission? + * If so then the scheduler can (potentially) manage the synchronisation. * * This code is meant to abstract object synchronization with the GPU. * Calling with NULL implies synchronizing the object with the CPU @@ -3599,7 +3610,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, int
[Intel-gfx] [RFC 31/39] drm/i915: Added debugfs interface to scheduler tuning parameters
From: John Harrison john.c.harri...@intel.com There are various parameters within the scheduler which can be tuned to improve performance, reduce memory footprint, etc. This change adds support for altering these via debugfs. Change-Id: I6c26765269ae7173ff4d3a5c20921eaaca7c36ed For: VIZ-1587 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 113 1 file changed, 113 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 05646fe..028fa8f 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -39,6 +39,7 @@ #include intel_ringbuffer.h #include drm/i915_drm.h #include i915_drv.h +#include i915_scheduler.h enum { ACTIVE_LIST, @@ -1123,6 +1124,114 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, i915_next_seqno_get, i915_next_seqno_set, 0x%llx\n); +static int +i915_scheduler_priority_max_get(void *data, u64 *val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev-dev_private; + struct i915_scheduler *scheduler = dev_priv-scheduler; + + *val = (u64) scheduler-priority_level_max; + return 0; +} + +static int +i915_scheduler_priority_max_set(void *data, u64 val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev-dev_private; + struct i915_scheduler *scheduler = dev_priv-scheduler; + + scheduler-priority_level_max = (u32) val; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_scheduler_priority_max_fops, + i915_scheduler_priority_max_get, + i915_scheduler_priority_max_set, + 0x%llx\n); + +static int +i915_scheduler_priority_preempt_get(void *data, u64 *val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev-dev_private; + struct i915_scheduler *scheduler = dev_priv-scheduler; + + *val = (u64) scheduler-priority_level_preempt; + return 0; +} + +static int +i915_scheduler_priority_preempt_set(void *data, u64 val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev-dev_private; + struct i915_scheduler *scheduler = dev_priv-scheduler; + + scheduler-priority_level_preempt = (u32) val; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_scheduler_priority_preempt_fops, + i915_scheduler_priority_preempt_get, + i915_scheduler_priority_preempt_set, + 0x%llx\n); + +static int +i915_scheduler_min_flying_get(void *data, u64 *val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev-dev_private; + struct i915_scheduler *scheduler = dev_priv-scheduler; + + *val = (u64) scheduler-min_flying; + return 0; +} + +static int +i915_scheduler_min_flying_set(void *data, u64 val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev-dev_private; + struct i915_scheduler *scheduler = dev_priv-scheduler; + + scheduler-min_flying = (u32) val; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_scheduler_min_flying_fops, + i915_scheduler_min_flying_get, + i915_scheduler_min_flying_set, + 0x%llx\n); + +static int +i915_scheduler_file_queue_max_get(void *data, u64 *val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev-dev_private; + struct i915_scheduler *scheduler = dev_priv-scheduler; + + *val = (u64) scheduler-file_queue_max; + return 0; +} + +static int +i915_scheduler_file_queue_max_set(void *data, u64 val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev-dev_private; + struct i915_scheduler *scheduler = dev_priv-scheduler; + + scheduler-file_queue_max = (u32) val; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_scheduler_file_queue_max_fops, + i915_scheduler_file_queue_max_get, + i915_scheduler_file_queue_max_set, + 0x%llx\n); + static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_info_node *node = m-private; @@ -5163,6 +5272,10 @@ static const struct i915_debugfs_files { {i915_gem_drop_caches, i915_drop_caches_fops}, {i915_error_state, i915_error_state_fops}, {i915_next_seqno, i915_next_seqno_fops}, + {i915_scheduler_priority_max, i915_scheduler_priority_max_fops}, + {i915_scheduler_priority_preempt, i915_scheduler_priority_preempt_fops}, + {i915_scheduler_min_flying, i915_scheduler_min_flying_fops}, +
[Intel-gfx] [RFC 4/9] drm/i915: Removed now redudant parameter to i915_gem_request_completed()
From: John Harrison john.c.harri...@intel.com The change to the implementation of i915_gem_request_completed() means that the lazy coherency flag is no longer used. This can now be removed to simplify the interface. For: VIZ-5190 Signed-off-by: John Harrison john.c.harri...@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 3 +-- drivers/gpu/drm/i915/i915_gem.c | 18 +- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index bc817da..b9a92fe 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -602,7 +602,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) i915_gem_request_get_seqno(work-flip_queued_req), dev_priv-next_seqno, ring-get_seqno(ring, true), - i915_gem_request_completed(work-flip_queued_req, true)); + i915_gem_request_completed(work-flip_queued_req)); } else seq_printf(m, Flip not associated with any ring\n); seq_printf(m, Flip queued on frame %d, (was ready on frame %d), now %d\n, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 79d346c..0c7df46 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2239,8 +2239,7 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring, struct drm_i915_gem_request **req_out); void i915_gem_request_cancel(struct drm_i915_gem_request *req); -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, - bool lazy_coherency) +static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req) { return fence_is_signaled(req-fence); } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 888bb72..3970250 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1170,7 +1170,7 @@ static int __i915_spin_request(struct drm_i915_gem_request *req) timeout = jiffies + 1; while (!need_resched()) { - if (i915_gem_request_completed(req, true)) + if (i915_gem_request_completed(req)) return 0; if (time_after_eq(jiffies, timeout)) @@ -1178,7 +1178,7 @@ static int __i915_spin_request(struct drm_i915_gem_request *req) cpu_relax_lowlatency(); } - if (i915_gem_request_completed(req, false)) + if (i915_gem_request_completed(req)) return 0; return -EAGAIN; @@ -1222,7 +1222,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, if (list_empty(req-list)) return 0; - if (i915_gem_request_completed(req, true)) + if (i915_gem_request_completed(req)) return 0; timeout_expire = timeout ? @@ -1262,7 +1262,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, break; } - if (i915_gem_request_completed(req, false)) { + if (i915_gem_request_completed(req)) { ret = 0; break; } @@ -2759,7 +2759,7 @@ i915_gem_find_active_request(struct intel_engine_cs *ring) struct drm_i915_gem_request *request; list_for_each_entry(request, ring-request_list, list) { - if (i915_gem_request_completed(request, false)) + if (i915_gem_request_completed(request)) continue; return request; @@ -2902,7 +2902,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) struct drm_i915_gem_request, list); - if (!i915_gem_request_completed(request, true)) + if (!i915_gem_request_completed(request)) break; i915_gem_request_retire(request); @@ -2926,7 +2926,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) } if (unlikely(ring-trace_irq_req -i915_gem_request_completed(ring-trace_irq_req, true))) { +i915_gem_request_completed(ring-trace_irq_req))) { ring-irq_put(ring); i915_gem_request_assign(ring-trace_irq_req, NULL); } @@ -3032,7 +3032,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) if (list_empty(req-list))
[Intel-gfx] [RFC 2/9] android: add sync_fence_create_dma
From: Maarten Lankhorst maarten.lankho...@canonical.com This allows users of dma fences to create a android fence. v2: Added kerneldoc. (Tvrtko Ursulin). Signed-off-by: Maarten Lankhorst maarten.lankho...@canonical.com Signed-off-by: Tvrtko Ursulin tvrtko.ursu...@intel.com Cc: Maarten Lankhorst maarten.lankho...@linux.intel.com Cc: Daniel Vetter dan...@ffwll.ch Cc: Jesse Barnes jbar...@virtuousgeek.org Cc: de...@driverdev.osuosl.org Cc: Riley Andrews riandr...@android.com Cc: Greg Kroah-Hartman gre...@linuxfoundation.org Cc: Arve Hjønnevåg a...@android.com --- drivers/staging/android/sync.c | 13 + drivers/staging/android/sync.h | 12 +++- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/staging/android/sync.c b/drivers/staging/android/sync.c index f83e00c..7f0e919 100644 --- a/drivers/staging/android/sync.c +++ b/drivers/staging/android/sync.c @@ -188,7 +188,7 @@ static void fence_check_cb_func(struct fence *f, struct fence_cb *cb) } /* TODO: implement a create which takes more that one sync_pt */ -struct sync_fence *sync_fence_create(const char *name, struct sync_pt *pt) +struct sync_fence *sync_fence_create_dma(const char *name, struct fence *pt) { struct sync_fence *fence; @@ -199,16 +199,21 @@ struct sync_fence *sync_fence_create(const char *name, struct sync_pt *pt) fence-num_fences = 1; atomic_set(fence-status, 1); - fence-cbs[0].sync_pt = pt-base; + fence-cbs[0].sync_pt = pt; fence-cbs[0].fence = fence; - if (fence_add_callback(pt-base, fence-cbs[0].cb, - fence_check_cb_func)) + if (fence_add_callback(pt, fence-cbs[0].cb, fence_check_cb_func)) atomic_dec(fence-status); sync_fence_debug_add(fence); return fence; } +EXPORT_SYMBOL(sync_fence_create_dma); + +struct sync_fence *sync_fence_create(const char *name, struct sync_pt *pt) +{ + return sync_fence_create_dma(name, pt-base); +} EXPORT_SYMBOL(sync_fence_create); struct sync_fence *sync_fence_fdget(int fd) diff --git a/drivers/staging/android/sync.h b/drivers/staging/android/sync.h index a21b79f..0f1299e 100644 --- a/drivers/staging/android/sync.h +++ b/drivers/staging/android/sync.h @@ -250,10 +250,20 @@ void sync_pt_free(struct sync_pt *pt); * @pt:sync_pt to add to the fence * * Creates a fence containg @pt. Once this is called, the fence takes - * ownership of @pt. + * a reference on @pt. */ struct sync_fence *sync_fence_create(const char *name, struct sync_pt *pt); +/** + * sync_fence_create_dma() - creates a sync fence from dma-fence + * @name: name of fence to create + * @pt:dma-fence to add to the fence + * + * Creates a fence containg @pt. Once this is called, the fence takes + * a reference on @pt. + */ +struct sync_fence *sync_fence_create_dma(const char *name, struct fence *pt); + /* * API for sync_fence consumers */ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC 1/9] staging/android/sync: Support sync points created from dma-fences
From: Tvrtko Ursulin tvrtko.ursu...@intel.com Debug output assumes all sync points are built on top of Android sync points and when we start creating them from dma-fences will NULL ptr deref unless taught about this. Signed-off-by: Tvrtko Ursulin tvrtko.ursu...@intel.com Cc: Maarten Lankhorst maarten.lankho...@linux.intel.com Cc: de...@driverdev.osuosl.org Cc: Riley Andrews riandr...@android.com Cc: Greg Kroah-Hartman gre...@linuxfoundation.org Cc: Arve Hjønnevåg a...@android.com --- drivers/staging/android/sync_debug.c | 42 +++- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/staging/android/sync_debug.c b/drivers/staging/android/sync_debug.c index 91ed2c4..f45d13c 100644 --- a/drivers/staging/android/sync_debug.c +++ b/drivers/staging/android/sync_debug.c @@ -82,36 +82,42 @@ static const char *sync_status_str(int status) return error; } -static void sync_print_pt(struct seq_file *s, struct sync_pt *pt, bool fence) +static void sync_print_pt(struct seq_file *s, struct fence *pt, bool fence) { int status = 1; - struct sync_timeline *parent = sync_pt_parent(pt); - if (fence_is_signaled_locked(pt-base)) - status = pt-base.status; + if (fence_is_signaled_locked(pt)) + status = pt-status; seq_printf(s, %s%spt %s, - fence ? parent-name : , + fence pt-ops-get_timeline_name ? + pt-ops-get_timeline_name(pt) : , fence ? _ : , sync_status_str(status)); if (status = 0) { struct timespec64 ts64 = - ktime_to_timespec64(pt-base.timestamp); + ktime_to_timespec64(pt-timestamp); seq_printf(s, @%lld.%09ld, (s64)ts64.tv_sec, ts64.tv_nsec); } - if (parent-ops-timeline_value_str - parent-ops-pt_value_str) { + if ((!fence || pt-ops-timeline_value_str) + pt-ops-fence_value_str) { char value[64]; + bool success; - parent-ops-pt_value_str(pt, value, sizeof(value)); - seq_printf(s, : %s, value); - if (fence) { - parent-ops-timeline_value_str(parent, value, - sizeof(value)); - seq_printf(s, / %s, value); + pt-ops-fence_value_str(pt, value, sizeof(value)); + success = strlen(value); + + if (success) + seq_printf(s, : %s, value); + + if (success fence) { + pt-ops-timeline_value_str(pt, value, sizeof(value)); + + if (strlen(value)) + seq_printf(s, / %s, value); } } @@ -138,7 +144,7 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj) list_for_each(pos, obj-child_list_head) { struct sync_pt *pt = container_of(pos, struct sync_pt, child_list); - sync_print_pt(s, pt, false); + sync_print_pt(s, pt-base, false); } spin_unlock_irqrestore(obj-child_list_lock, flags); } @@ -153,11 +159,7 @@ static void sync_print_fence(struct seq_file *s, struct sync_fence *fence) sync_status_str(atomic_read(fence-status))); for (i = 0; i fence-num_fences; ++i) { - struct sync_pt *pt = - container_of(fence-cbs[i].sync_pt, -struct sync_pt, base); - - sync_print_pt(s, pt, true); + sync_print_pt(s, fence-cbs[i].sync_pt, true); } spin_lock_irqsave(fence-wq.lock, flags); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC 1/9] staging/android/sync: Support sync points created from dma-fences
On 07/17/2015 03:31 PM, john.c.harri...@intel.com wrote: From: Tvrtko Ursulin tvrtko.ursu...@intel.com Debug output assumes all sync points are built on top of Android sync points and when we start creating them from dma-fences will NULL ptr deref unless taught about this. This is Maarten's code, just the patch had a troubled history where it got misplaced, forgotten and then resurrected but with the commit message lost. Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] igt/kms_plane_scaling : plane scaling enhancement and plane support for bxt
On 17 July 2015 at 09:34, Nabendu Maiti nabendu.bikash.ma...@intel.com wrote: 1) Added more overlay plane support for BXT. 2) Added and enhanced scaler test cases for additional planes. Please split the various separate changes in different patches. The i-g-t tag should also appear in the subject prefix rather than the subject tag. (i.e. use --subject-prefix=PATCH i-g-t or git config format.subjectprefix PATCH i-g-t). Signed-off-by: Nabendu Maiti nabendu.bikash.ma...@intel.com --- lib/igt_fb.c | 40 lib/igt_fb.h | 1 + lib/igt_kms.c | 1 + lib/igt_kms.h | 3 +- tests/kms_plane_scaling.c | 79 --- 5 files changed, 118 insertions(+), 6 deletions(-) diff --git a/lib/igt_fb.c b/lib/igt_fb.c index 134dbd2..e61b762 100644 --- a/lib/igt_fb.c +++ b/lib/igt_fb.c @@ -393,6 +393,46 @@ void igt_paint_image(cairo_t *cr, const char *filename, } /** + * igt_paint_cross_ruler: + * @cr: cairo drawing context + * @dst_width: width of the horizontal ruler + * @dst_height: height of the vertical ruler + * + * This function can be used to draw a cross ruler on a frame buffer. + */ +void +igt_paint_cross_ruler(cairo_t *cr, uint16_t w, uint16_t h) Other cairo functions use int rather than uint16_t for width and height, so int should be sufficient here too, at least for consistency. +{ + + uint16_t i; + + /* Paint corner markers */ + paint_marker(cr, 0, 0); + paint_marker(cr, w, 0); + paint_marker(cr, 0, h); + paint_marker(cr, w, h); + + + cairo_move_to(cr, w/2, 0); + cairo_line_to(cr, w/2, h); + + cairo_set_source_rgb(cr, 4, 1, 10); + cairo_move_to(cr, 0, h/2); + cairo_line_to(cr, w, h/2 ); + cairo_stroke(cr); + + cairo_set_source_rgb(cr, 2, 5, 1); + cairo_set_line_width(cr, 2); + cairo_stroke(cr); + cairo_stroke_preserve(cr); + for (i = 0; i w; i +=200) + paint_marker(cr, i, h/2); + + for (i = 0; i h; i +=200) + paint_marker(cr, w/2, i); +} + +/** * igt_create_fb_with_bo_size: * @fd: open i915 drm file descriptor * @width: width of the framebuffer in pixel diff --git a/lib/igt_fb.h b/lib/igt_fb.h index a07acd2..cf6e7e3 100644 --- a/lib/igt_fb.h +++ b/lib/igt_fb.h @@ -98,6 +98,7 @@ void igt_write_fb_to_png(int fd, struct igt_fb *fb, const char *filename); int igt_cairo_printf_line(cairo_t *cr, enum igt_text_align align, double yspacing, const char *fmt, ...) __attribute__((format (printf, 4, 5))); +void igt_paint_cross_ruler(cairo_t *cr, uint16_t w, uint16_t h); /* helpers to handle drm fourcc codes */ uint32_t igt_bpp_depth_to_drm_format(int bpp, int depth); diff --git a/lib/igt_kms.c b/lib/igt_kms.c index 0bb16b4..781ffa5 100644 --- a/lib/igt_kms.c +++ b/lib/igt_kms.c @@ -213,6 +213,7 @@ const char *kmstest_plane_name(enum igt_plane plane) [IGT_PLANE_1] = plane1, [IGT_PLANE_2] = plane2, [IGT_PLANE_3] = plane3, + [IGT_PLANE_4] = plane4, [IGT_PLANE_CURSOR] = cursor, }; diff --git a/lib/igt_kms.h b/lib/igt_kms.h index 09c08aa..14c8b28 100644 --- a/lib/igt_kms.h +++ b/lib/igt_kms.h @@ -53,6 +53,7 @@ enum igt_plane { IGT_PLANE_PRIMARY = IGT_PLANE_1, IGT_PLANE_2, IGT_PLANE_3, +IGT_PLANE_4, IGT_PLANE_CURSOR, }; @@ -205,7 +206,7 @@ struct igt_pipe { igt_display_t *display; enum pipe pipe; bool enabled; -#define IGT_MAX_PLANES 4 +#define IGT_MAX_PLANES 5 int n_planes; igt_plane_t planes[IGT_MAX_PLANES]; uint64_t background; /* Background color MSB BGR 16bpc LSB */ diff --git a/tests/kms_plane_scaling.c b/tests/kms_plane_scaling.c index 00db5cb..8e3d559 100644 --- a/tests/kms_plane_scaling.c +++ b/tests/kms_plane_scaling.c @@ -23,7 +23,7 @@ */ #include math.h - +#include cairo.h cairo is already included via igt_kms.h and igt_fb.h. #include drmtest.h #include igt_debugfs.h #include igt_kms.h @@ -48,9 +48,11 @@ typedef struct { struct igt_fb fb1; struct igt_fb fb2; struct igt_fb fb3; + struct igt_fb fb4; int fb_id1; int fb_id2; int fb_id3; + int fb_id4; igt_plane_t *plane1; igt_plane_t *plane2; @@ -61,6 +63,22 @@ typedef struct { #define FILE_NAME 1080p-left.png static void +paint_plane_ID(data_t *d, struct igt_fb *fb, igt_plane_t *plane) +{ + cairo_t *cr; + + cr = igt_get_cairo_ctx(d-drm_fd, fb); + cairo_move_to(cr, (fb-width/5), + (fb-height / 5)); + cairo_set_font_size(cr, 25); + igt_cairo_printf_line(cr, align_hcenter, 10, PIPE:PLANE:); +
Re: [Intel-gfx] [PATCH i-g-t v3] Android.mk: Disable tools that do not build for android
On 17 July 2015 at 10:15, Derek Morton derek.j.mor...@intel.com wrote: Disable the tools / demo code that do not currently build for android until they can be fixed. Affected tools / demos intel_display_crc intel_sprite_on v2: intel_display_crc compiled conditionally on ANDROID_HAS_CAIRO flag. v3: removed intel_reg from the skip list as Thomas has prepared a patch to fix it for Android. Signed-off-by: Derek Morton derek.j.mor...@intel.com --- Android.mk | 2 +- tools/Android.mk | 4 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Android.mk b/Android.mk index 1ab3e64..681d114 100644 --- a/Android.mk +++ b/Android.mk @@ -1,2 +1,2 @@ -include $(call all-named-subdir-makefiles, lib tests tools benchmarks demos) +include $(call all-named-subdir-makefiles, lib tests tools benchmarks) This essentially just disables building intel_sprite_on. Does the comment in commit 6999b70 no longer apply? diff --git a/tools/Android.mk b/tools/Android.mk index 39f4512..b5f8008 100644 --- a/tools/Android.mk +++ b/tools/Android.mk @@ -41,6 +41,10 @@ skip_tools_list := \ intel_vga_read \ intel_vga_write +ifneq (${ANDROID_HAS_CAIRO}, 1) +skip_tools_list += intel_display_crc +endif + tools_list := $(filter-out $(skip_tools_list),$(bin_PROGRAMS)) $(foreach item,$(tools_list),$(eval $(call add_tool,$(item -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH V2] drm/i915/skl+: Add YUV pixel format in Capability list
GEN = 9 supports YUV format for all planes, but it's not exported in Capability list of primary plane. Add YUV formats in skl_primary_formats list. Don't rely on fb-bits_per_pixel as intel_framebuffer_init is not filling bits_per_pixel field of fb-struct for YUV pixel format. This leads to divide by zero error during watermark calculation. V2: Don't break NV12 case. Signed-off-by: Kumar, Mahesh mahesh1.ku...@intel.com Cc: Konduru, Chandra chandra.kond...@intel.com --- IGT changes made for testcase will be sent in separate patch. drivers/gpu/drm/i915/intel_display.c | 4 drivers/gpu/drm/i915/intel_pm.c | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index af0bcfe..d31704a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -72,6 +72,10 @@ static const uint32_t skl_primary_formats[] = { DRM_FORMAT_ABGR, DRM_FORMAT_XRGB2101010, DRM_FORMAT_XBGR2101010, + DRM_FORMAT_YUYV, + DRM_FORMAT_YVYU, + DRM_FORMAT_UYVY, + DRM_FORMAT_VYUY, }; /* Cursor formats */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5eeddc9..5768f8c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3164,7 +3164,8 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc, if (fb) { p-plane[0].enabled = true; p-plane[0].bytes_per_pixel = fb-pixel_format == DRM_FORMAT_NV12 ? - drm_format_plane_cpp(fb-pixel_format, 1) : fb-bits_per_pixel / 8; + drm_format_plane_cpp(fb-pixel_format, 1) : + drm_format_plane_cpp(fb-pixel_format, 0); p-plane[0].y_bytes_per_pixel = fb-pixel_format == DRM_FORMAT_NV12 ? drm_format_plane_cpp(fb-pixel_format, 0) : 0; p-plane[0].tiling = fb-modifier[0]; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3] drm/i915: Use two 32bit reads for select 64bit REG_READ ioctls
On Thu, Jul 16, 2015 at 12:37:56PM +0100, Chris Wilson wrote: Since the hardware sometimes mysteriously totally flummoxes the 64bit read of a 64bit register when read using a single instruction, split the read into two instructions. Since the read here is of automatically incrementing timestamp counters, we also have to be very careful in order to make sure that it does not increment between the two instructions. However, since userspace tried to workaround this issue and so enshrined this ABI for a broken hardware read and in the process neglected that the read only fails in some environments, we have to introduce a new uABI flag for userspace to request the 2x32 bit accurate read of the timestamp. v2: Fix alignment check and include details of the workaround for userspace. Reported-by: Karol Herbst freedesk...@karolherbst.de Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91317 Testcase: igt/gem_reg_read Tested-by: Michał Winiarski michal.winiar...@intel.com Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk Cc: Michał Winiarski michal.winiar...@intel.com Cc: sta...@vger.kernel.org --- drivers/gpu/drm/i915/intel_uncore.c | 26 +++--- include/uapi/drm/i915_drm.h | 8 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 2c477663d378..eb244b57b3fd 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1310,10 +1310,12 @@ int i915_reg_read_ioctl(struct drm_device *dev, struct drm_i915_private *dev_priv = dev-dev_private; struct drm_i915_reg_read *reg = data; struct register_whitelist const *entry = whitelist; + unsigned size; + u64 offset; int i, ret = 0; for (i = 0; i ARRAY_SIZE(whitelist); i++, entry++) { - if (entry-offset == reg-offset + if (entry-offset == (reg-offset -entry-size) (1 INTEL_INFO(dev)-gen entry-gen_bitmask)) break; } @@ -1321,23 +1323,33 @@ int i915_reg_read_ioctl(struct drm_device *dev, if (i == ARRAY_SIZE(whitelist)) return -EINVAL; + /* We use the low bits to encode extra flags as the register should + * be naturally aligned (and those that are not so aligned merely + * limit the available flags for that register). + */ + offset = entry-offset; + size = entry-size; + size |= reg-offset ^ offset; + intel_runtime_pm_get(dev_priv); - switch (entry-size) { + switch (size) { + case 8 | 1: + reg-val = I915_READ64_2x32(offset, offset+4); + break; case 8: - reg-val = I915_READ64(reg-offset); + reg-val = I915_READ64(offset); break; case 4: - reg-val = I915_READ(reg-offset); + reg-val = I915_READ(offset); break; case 2: - reg-val = I915_READ16(reg-offset); + reg-val = I915_READ16(offset); break; case 1: - reg-val = I915_READ8(reg-offset); + reg-val = I915_READ8(offset); break; default: - MISSING_CASE(entry-size); ret = -EINVAL; goto out; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index b0f82ddab987..83f60f01dca2 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1087,6 +1087,14 @@ struct drm_i915_reg_read { __u64 offset; __u64 val; /* Return value */ }; +/* Known registers: + * + * Render engine timestamp - 0x2358 + 64bit - gen7+ + * - Note this register returns an invalid value if using the default + * single instruction 8byte read, in order to workaround that use + * offset (0x2538 | 1) instead. + * + */ struct drm_i915_reset_stats { __u32 ctx_id; -- 2.1.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/2] drm/i915/skl: Drop the preliminary_hw_support flag
On Thu, Jul 16, 2015 at 05:08:09PM +0100, Damien Lespiau wrote: Time to light a candle and remove the preliminary_hw_support flag. Signed-off-by: Damien Lespiau damien.lesp...@intel.com Both applied to dinq, thanks. -Daniel --- drivers/gpu/drm/i915/i915_drv.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index e44dc0d..c8daa2d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -356,7 +356,6 @@ static const struct intel_device_info intel_cherryview_info = { }; static const struct intel_device_info intel_skylake_info = { - .is_preliminary = 1, .is_skylake = 1, .gen = 9, .num_pipes = 3, .need_gfx_hws = 1, .has_hotplug = 1, @@ -369,7 +368,6 @@ static const struct intel_device_info intel_skylake_info = { }; static const struct intel_device_info intel_skylake_gt3_info = { - .is_preliminary = 1, .is_skylake = 1, .gen = 9, .num_pipes = 3, .need_gfx_hws = 1, .has_hotplug = 1, -- 2.1.0 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH i-g-t v3] Android.mk: Disable tools that do not build for android
-Original Message- From: Morton, Derek J Sent: Friday, July 17, 2015 9:44 AM To: intel-gfx@lists.freedesktop.org Cc: Wood, Thomas; Morton, Derek J Subject: [PATCH i-g-t v3] Android.mk: Disable tools that do not build for android Disable the tools / demo code that do not currently build for android until they can be fixed. Affected tools / demos intel_display_crc intel_sprite_on v2: intel_display_crc compiled conditionally on ANDROID_HAS_CAIRO flag. v3: removed intel_reg from the skip list as Thomas has prepared a patch to fix it for Android. Signed-off-by: Derek Morton derek.j.mor...@intel.com --- Android.mk | 2 +- tools/Android.mk | 5 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Android.mk b/Android.mk index 1ab3e64..681d114 100644 --- a/Android.mk +++ b/Android.mk @@ -1,2 +1,2 @@ -include $(call all-named-subdir-makefiles, lib tests tools benchmarks demos) +include $(call all-named-subdir-makefiles, lib tests tools benchmarks) diff --git a/tools/Android.mk b/tools/Android.mk index 39f4512..4be0032 100644 --- a/tools/Android.mk +++ b/tools/Android.mk @@ -37,10 +37,15 @@ endef skip_tools_list := \ intel_framebuffer_dump \ +intel_reg \ That's weird, git diff HEAD^ shows this being removed but git format-patch HEAD^ has left it in. intel_reg_dumper \ intel_vga_read \ intel_vga_write +ifneq (${ANDROID_HAS_CAIRO}, 1) +skip_tools_list += intel_display_crc endif + tools_list := $(filter-out $(skip_tools_list),$(bin_PROGRAMS)) $(foreach item,$(tools_list),$(eval $(call add_tool,$(item -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/2] drm/i915: Don't reprobe on resume
On Thu, Jul 16, 2015 at 04:32:44PM +0100, Chris Wilson wrote: On Thu, Jul 16, 2015 at 04:47:51PM +0200, Daniel Vetter wrote: If we don't force the connector state to unknown there's no reason any more to force a reprobe. Also no other driver bothers with this, so probably it's not required - userspace handles lid/resume events through other channels already. No, we don't. We don't synthesize any events at all for changing connectors whilst suspended and userspace doesn't know about being suspended. One night of sleep does wonders ;-) I agree the patch is crap and my thinking that it's been broken since ages is also: We start the poll helper right away and that will take care of all the non-hpd ports. It's all fine as-is. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH i-g-t v3] Android.mk: Disable tools that do not build for android
Disable the tools / demo code that do not currently build for android until they can be fixed. Affected tools / demos intel_display_crc intel_sprite_on v2: intel_display_crc compiled conditionally on ANDROID_HAS_CAIRO flag. v3: removed intel_reg from the skip list as Thomas has prepared a patch to fix it for Android. Signed-off-by: Derek Morton derek.j.mor...@intel.com --- Android.mk | 2 +- tools/Android.mk | 4 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Android.mk b/Android.mk index 1ab3e64..681d114 100644 --- a/Android.mk +++ b/Android.mk @@ -1,2 +1,2 @@ -include $(call all-named-subdir-makefiles, lib tests tools benchmarks demos) +include $(call all-named-subdir-makefiles, lib tests tools benchmarks) diff --git a/tools/Android.mk b/tools/Android.mk index 39f4512..b5f8008 100644 --- a/tools/Android.mk +++ b/tools/Android.mk @@ -41,6 +41,10 @@ skip_tools_list := \ intel_vga_read \ intel_vga_write +ifneq (${ANDROID_HAS_CAIRO}, 1) +skip_tools_list += intel_display_crc +endif + tools_list := $(filter-out $(skip_tools_list),$(bin_PROGRAMS)) $(foreach item,$(tools_list),$(eval $(call add_tool,$(item -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915/bxt: WA for swapped HPD pins in A stepping
As per bspec, on BXT A0/A1, sw needs to activate DDIA HPD logic and interrupts to check the external panel connection and DDIC HPD logic for edp panel. v2: For DP, irq_port is used to determine the encoder instead of hpd_pin and removing the edp HPD logic because port A HPD is not present(Imre) Signed-off-by: Sonika Jindal sonika.jin...@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 10 +- drivers/gpu/drm/i915/intel_hdmi.c |9 - 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index e2c6f73..777e3a3 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -3225,7 +3225,15 @@ void intel_ddi_init(struct drm_device *dev, enum port port) goto err; intel_dig_port-hpd_pulse = intel_dp_hpd_pulse; - dev_priv-hotplug.irq_port[port] = intel_dig_port; + /* +* On BXT A0/A1, sw needs to activate DDIA HPD logic and +* interrupts to check the external panel connection. +*/ + if (IS_BROXTON(dev_priv) (INTEL_REVID(dev) BXT_REVID_B0) + port == PORT_B) + dev_priv-hotplug.irq_port[PORT_A] = intel_dig_port; + else + dev_priv-hotplug.irq_port[port] = intel_dig_port; } /* In theory we don't need the encoder-type check, but leave it just in diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 70bad5b..94fa716 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1973,7 +1973,14 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, intel_hdmi-ddc_bus = GMBUS_PIN_1_BXT; else intel_hdmi-ddc_bus = GMBUS_PIN_DPB; - intel_encoder-hpd_pin = HPD_PORT_B; + /* +* On BXT A0/A1, sw needs to activate DDIA HPD logic and +* interrupts to check the external panel connection. +*/ + if (IS_BROXTON(dev_priv) (INTEL_REVID(dev) BXT_REVID_B0)) + intel_encoder-hpd_pin = HPD_PORT_A; + else + intel_encoder-hpd_pin = HPD_PORT_B; break; case PORT_C: if (IS_BROXTON(dev_priv)) -- 1.7.10.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH i-g-t v3] Android.mk: Disable tools that do not build for android
Disable the tools / demo code that do not currently build for android until they can be fixed. Affected tools / demos intel_display_crc intel_sprite_on v2: intel_display_crc compiled conditionally on ANDROID_HAS_CAIRO flag. v3: removed intel_reg from the skip list as Thomas has prepared a patch to fix it for Android. Signed-off-by: Derek Morton derek.j.mor...@intel.com --- Android.mk | 2 +- tools/Android.mk | 5 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Android.mk b/Android.mk index 1ab3e64..681d114 100644 --- a/Android.mk +++ b/Android.mk @@ -1,2 +1,2 @@ -include $(call all-named-subdir-makefiles, lib tests tools benchmarks demos) +include $(call all-named-subdir-makefiles, lib tests tools benchmarks) diff --git a/tools/Android.mk b/tools/Android.mk index 39f4512..4be0032 100644 --- a/tools/Android.mk +++ b/tools/Android.mk @@ -37,10 +37,15 @@ endef skip_tools_list := \ intel_framebuffer_dump \ +intel_reg \ intel_reg_dumper \ intel_vga_read \ intel_vga_write +ifneq (${ANDROID_HAS_CAIRO}, 1) +skip_tools_list += intel_display_crc +endif + tools_list := $(filter-out $(skip_tools_list),$(bin_PROGRAMS)) $(foreach item,$(tools_list),$(eval $(call add_tool,$(item -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Fix divide by zero on watermark update
On Thu, Jul 16, 2015 at 01:43:15PM -0300, Paulo Zanoni wrote: 2015-07-16 13:36 GMT-03:00 Mika Kuoppala mika.kuopp...@linux.intel.com: Fix divide by zero if we end up updating the watermarks with zero dotclock. This is a stop gap measure to allow module load in cases where our state keeping fails. v2: WARN_ON added (Paulo) Since we're not hiding the problem (due to the WARN_ON) and the patch improves the current situation: Reviewed-by: Paulo Zanoni paulo.r.zan...@intel.com Queued for -next, thanks for the patch. -Daniel Cc: Paulo Zanoni przan...@gmail.com Cc: Damien Lespiau damien.lesp...@intel.com Signed-off-by: Mika Kuoppala mika.kuopp...@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5eeddc9..0d3e014 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3316,8 +3316,10 @@ skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p) if (!to_intel_crtc(crtc)-active) return 0; - return DIV_ROUND_UP(8 * p-pipe_htotal * 1000, p-pixel_rate); + if (WARN_ON(p-pixel_rate == 0)) + return 0; + return DIV_ROUND_UP(8 * p-pipe_htotal * 1000, p-pixel_rate); } static void skl_compute_transition_wm(struct drm_crtc *crtc, -- 2.1.4 -- Paulo Zanoni ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] igt/kms_plane_scaling : plane scaling enhancement and plane support for bxt
1) Added more overlay plane support for BXT. 2) Added and enhanced scaler test cases for additional planes. Signed-off-by: Nabendu Maiti nabendu.bikash.ma...@intel.com --- lib/igt_fb.c | 40 lib/igt_fb.h | 1 + lib/igt_kms.c | 1 + lib/igt_kms.h | 3 +- tests/kms_plane_scaling.c | 79 --- 5 files changed, 118 insertions(+), 6 deletions(-) diff --git a/lib/igt_fb.c b/lib/igt_fb.c index 134dbd2..e61b762 100644 --- a/lib/igt_fb.c +++ b/lib/igt_fb.c @@ -393,6 +393,46 @@ void igt_paint_image(cairo_t *cr, const char *filename, } /** + * igt_paint_cross_ruler: + * @cr: cairo drawing context + * @dst_width: width of the horizontal ruler + * @dst_height: height of the vertical ruler + * + * This function can be used to draw a cross ruler on a frame buffer. + */ +void +igt_paint_cross_ruler(cairo_t *cr, uint16_t w, uint16_t h) +{ + + uint16_t i; + + /* Paint corner markers */ + paint_marker(cr, 0, 0); + paint_marker(cr, w, 0); + paint_marker(cr, 0, h); + paint_marker(cr, w, h); + + + cairo_move_to(cr, w/2, 0); + cairo_line_to(cr, w/2, h); + + cairo_set_source_rgb(cr, 4, 1, 10); + cairo_move_to(cr, 0, h/2); + cairo_line_to(cr, w, h/2 ); + cairo_stroke(cr); + + cairo_set_source_rgb(cr, 2, 5, 1); + cairo_set_line_width(cr, 2); + cairo_stroke(cr); + cairo_stroke_preserve(cr); + for (i = 0; i w; i +=200) + paint_marker(cr, i, h/2); + + for (i = 0; i h; i +=200) + paint_marker(cr, w/2, i); +} + +/** * igt_create_fb_with_bo_size: * @fd: open i915 drm file descriptor * @width: width of the framebuffer in pixel diff --git a/lib/igt_fb.h b/lib/igt_fb.h index a07acd2..cf6e7e3 100644 --- a/lib/igt_fb.h +++ b/lib/igt_fb.h @@ -98,6 +98,7 @@ void igt_write_fb_to_png(int fd, struct igt_fb *fb, const char *filename); int igt_cairo_printf_line(cairo_t *cr, enum igt_text_align align, double yspacing, const char *fmt, ...) __attribute__((format (printf, 4, 5))); +void igt_paint_cross_ruler(cairo_t *cr, uint16_t w, uint16_t h); /* helpers to handle drm fourcc codes */ uint32_t igt_bpp_depth_to_drm_format(int bpp, int depth); diff --git a/lib/igt_kms.c b/lib/igt_kms.c index 0bb16b4..781ffa5 100644 --- a/lib/igt_kms.c +++ b/lib/igt_kms.c @@ -213,6 +213,7 @@ const char *kmstest_plane_name(enum igt_plane plane) [IGT_PLANE_1] = plane1, [IGT_PLANE_2] = plane2, [IGT_PLANE_3] = plane3, + [IGT_PLANE_4] = plane4, [IGT_PLANE_CURSOR] = cursor, }; diff --git a/lib/igt_kms.h b/lib/igt_kms.h index 09c08aa..14c8b28 100644 --- a/lib/igt_kms.h +++ b/lib/igt_kms.h @@ -53,6 +53,7 @@ enum igt_plane { IGT_PLANE_PRIMARY = IGT_PLANE_1, IGT_PLANE_2, IGT_PLANE_3, +IGT_PLANE_4, IGT_PLANE_CURSOR, }; @@ -205,7 +206,7 @@ struct igt_pipe { igt_display_t *display; enum pipe pipe; bool enabled; -#define IGT_MAX_PLANES 4 +#define IGT_MAX_PLANES 5 int n_planes; igt_plane_t planes[IGT_MAX_PLANES]; uint64_t background; /* Background color MSB BGR 16bpc LSB */ diff --git a/tests/kms_plane_scaling.c b/tests/kms_plane_scaling.c index 00db5cb..8e3d559 100644 --- a/tests/kms_plane_scaling.c +++ b/tests/kms_plane_scaling.c @@ -23,7 +23,7 @@ */ #include math.h - +#include cairo.h #include drmtest.h #include igt_debugfs.h #include igt_kms.h @@ -48,9 +48,11 @@ typedef struct { struct igt_fb fb1; struct igt_fb fb2; struct igt_fb fb3; + struct igt_fb fb4; int fb_id1; int fb_id2; int fb_id3; + int fb_id4; igt_plane_t *plane1; igt_plane_t *plane2; @@ -61,6 +63,22 @@ typedef struct { #define FILE_NAME 1080p-left.png static void +paint_plane_ID(data_t *d, struct igt_fb *fb, igt_plane_t *plane) +{ + cairo_t *cr; + + cr = igt_get_cairo_ctx(d-drm_fd, fb); + cairo_move_to(cr, (fb-width/5), + (fb-height / 5)); + cairo_set_font_size(cr, 25); + igt_cairo_printf_line(cr, align_hcenter, 10, PIPE:PLANE:); + cairo_set_font_size(cr, 30); + igt_cairo_printf_line(cr, align_hcenter, 40, %d:%d, + plane-pipe-pipe, plane-index); + cairo_destroy(cr); +} + +static void paint_color(data_t *d, struct igt_fb *fb, uint16_t w, uint16_t h) { cairo_t *cr; @@ -71,12 +89,14 @@ paint_color(data_t *d, struct igt_fb *fb, uint16_t w, uint16_t h) } static void -paint_image(data_t *d, struct igt_fb *fb, uint16_t w, uint16_t h) +paint_image(const char *filename, data_t *d, struct igt_fb *fb, + uint16_t w, uint16_t h) { cairo_t *cr; cr = igt_get_cairo_ctx(d-drm_fd, fb); -
Re: [Intel-gfx] [RFC] drm/i915/gtt: Allow = 4GB offsets in X86_32
On 07/16/2015 05:18 PM, Michel Thierry wrote: Commit c44ef60e4370 (drm/i915/gtt: Allow = 4GB sizes for vm.) took care of most of this changes, but i915_gem_obj_offset still returned an unsigned long, which in only 4-bytes long in 32-bit kernels. Change return type (and other related offset variables) to u64. Since Global GTT is always limited to 4GB, this change is not required in i915_gem_obj_ggtt_offset. Although in another patch dealing with GGTT I was asked to use u64 explicitly so how to make sure we get some consistency in this area? Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v1 0/4] Add Pooled EU support to BXT
This patches adds support to enable Pooled EU feature in BXT. This series has a dependency with Patch1 from Mika hence sending it as part of the series (already reviewed). Patch2 - adds a framework to extend the golden context batch through which we can add Gen based commands to enable specific features, in this case it is used to enabled Pooled EU (Patch3) This is on the idea suggested by Chris Wilson to send two batches instead of the previous approach of patching the binary data which is unnecessarily complicated (http://www.spinics.net/lists/intel-gfx/msg71498.html). Patch4 - option for the userspace to query it's availability. Arun Siluvery (3): drm/i915: Add provision to extend Golden context batch drm/i915:bxt: Enable Pooled EU support drm/i915/bxt: Add get_param to query Pooled EU availability Mika Kuoppala (1): drm/i915: Do kunmap if renderstate parsing fails drivers/gpu/drm/i915/i915_dma.c | 3 ++ drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 5 ++- drivers/gpu/drm/i915/i915_gem_render_state.c | 52 ++-- drivers/gpu/drm/i915/i915_gem_render_state.h | 2 ++ drivers/gpu/drm/i915/i915_reg.h | 2 ++ drivers/gpu/drm/i915/intel_lrc.c | 6 include/uapi/drm/i915_drm.h | 1 + 8 files changed, 69 insertions(+), 3 deletions(-) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v1 4/4] drm/i915/bxt: Add get_param to query Pooled EU availability
User space clients need to know when the pooled EU feature is present and enabled on the hardware so that they can adapt work submissions. Create a new device info flag for this purpose, and create a new GETPARAM entry to allow user space to query its setting. Set has_pooled_eu to true in the Broxton static device info - Broxton supports the feature in hardware and the driver will enable it by default. Signed-off-by: Jeff McGee jeff.mc...@intel.com Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 5 - include/uapi/drm/i915_drm.h | 1 + 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 5e63076..6c31beb 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_RESOURCE_STREAMER: value = HAS_RESOURCE_STREAMER(dev); break; + case I915_PARAM_HAS_POOLED_EU: + value = HAS_POOLED_EU(dev); + break; default: DRM_DEBUG(Unknown parameter %d\n, param-param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index e44dc0d..213f74d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -389,6 +389,7 @@ static const struct intel_device_info intel_broxton_info = { .num_pipes = 3, .has_ddi = 1, .has_fbc = 1, + .has_pooled_eu = 1, GEN_DEFAULT_PIPEOFFSETS, IVB_CURSOR_OFFSETS, }; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 768d1db..32850a8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -775,7 +775,8 @@ struct intel_csr { func(supports_tv) sep \ func(has_llc) sep \ func(has_ddi) sep \ - func(has_fpga_dbg) + func(has_fpga_dbg) sep \ + func(has_pooled_eu) #define DEFINE_FLAG(name) u8 name:1 #define SEP_SEMICOLON ; @@ -2549,6 +2550,8 @@ struct drm_i915_cmd_table { #define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \ INTEL_INFO(dev)-gen = 8) +#define HAS_POOLED_EU(dev) (INTEL_INFO(dev)-has_pooled_eu) + #define INTEL_PCH_DEVICE_ID_MASK 0xff00 #define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00 #define INTEL_PCH_CPT_DEVICE_ID_TYPE 0x1c00 diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e7c29f1..9649577 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_EU_TOTAL 34 #define I915_PARAM_HAS_GPU_RESET35 #define I915_PARAM_HAS_RESOURCE_STREAMER 36 +#define I915_PARAM_HAS_POOLED_EU 37 typedef struct drm_i915_getparam { int param; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v1 1/4] drm/i915: Do kunmap if renderstate parsing fails
From: Mika Kuoppala mika.kuopp...@linux.intel.com Kunmap the renderstate page on error path. Reviewed-by: Arun Siluvery arun.siluv...@linux.intel.com Signed-off-by: Mika Kuoppala mika.kuopp...@intel.com --- drivers/gpu/drm/i915/i915_gem_render_state.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index a0201fc..b6492fe 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -96,8 +96,10 @@ static int render_state_setup(struct render_state *so) s = lower_32_bits(r); if (so-gen = 8) { if (i + 1 = rodata-batch_items || - rodata-batch[i + 1] != 0) - return -EINVAL; + rodata-batch[i + 1] != 0) { + ret = -EINVAL; + goto err_out; + } d[i++] = s; s = upper_32_bits(r); @@ -120,6 +122,10 @@ static int render_state_setup(struct render_state *so) } return 0; + +err_out: + kunmap(page); + return ret; } void i915_gem_render_state_fini(struct render_state *so) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH i-g-t v3] Android.mk: Disable tools that do not build for android
-Original Message- From: Thomas Wood [mailto:thomas.w...@intel.com] Sent: Friday, July 17, 2015 3:18 PM To: Morton, Derek J Cc: Intel Graphics Development Subject: Re: [PATCH i-g-t v3] Android.mk: Disable tools that do not build for android On 17 July 2015 at 10:15, Derek Morton derek.j.mor...@intel.com wrote: Disable the tools / demo code that do not currently build for android until they can be fixed. Affected tools / demos intel_display_crc intel_sprite_on v2: intel_display_crc compiled conditionally on ANDROID_HAS_CAIRO flag. v3: removed intel_reg from the skip list as Thomas has prepared a patch to fix it for Android. Signed-off-by: Derek Morton derek.j.mor...@intel.com --- Android.mk | 2 +- tools/Android.mk | 4 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Android.mk b/Android.mk index 1ab3e64..681d114 100644 --- a/Android.mk +++ b/Android.mk @@ -1,2 +1,2 @@ -include $(call all-named-subdir-makefiles, lib tests tools benchmarks demos) +include $(call all-named-subdir-makefiles, lib tests tools +benchmarks) This essentially just disables building intel_sprite_on. Does the comment in commit 6999b70 no longer apply? With this patch the IGT automatic build test would not need to apply the patch referred to in commit 6999b70. That local patch would still be required by anyone wishing to build intel_sprite_on for android. This patch is aimed at reducing friction by making the vanilla IGT code build for android as the local patch referred to is not considered upstreamable. //Derek diff --git a/tools/Android.mk b/tools/Android.mk index 39f4512..b5f8008 100644 --- a/tools/Android.mk +++ b/tools/Android.mk @@ -41,6 +41,10 @@ skip_tools_list := \ intel_vga_read \ intel_vga_write +ifneq (${ANDROID_HAS_CAIRO}, 1) +skip_tools_list += intel_display_crc endif + tools_list := $(filter-out $(skip_tools_list),$(bin_PROGRAMS)) $(foreach item,$(tools_list),$(eval $(call add_tool,$(item -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v1 3/4] drm/i915:bxt: Enable Pooled EU support
This mode allows to assign EUs to pools. The command to enable this mode is sent in auxiliary golden context batch as this is only issued once with each context initialization. Thanks to Mika for the preliminary review. Cc: Mika Kuoppala mika.kuopp...@intel.com Cc: Chris Wilson ch...@chris-wilson.co.uk Cc: Armin Reese armin.c.re...@intel.com Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_render_state.c | 15 +++ drivers/gpu/drm/i915/i915_reg.h | 2 ++ 2 files changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index b86e382..a41a1b6 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -84,6 +84,7 @@ free_gem: static int render_state_setup(struct render_state *so) { + struct drm_device *dev = so-obj-base.dev; const struct intel_renderstate_rodata *rodata = so-rodata; unsigned int i = 0, reloc_index = 0; struct page *page; @@ -125,6 +126,20 @@ static int render_state_setup(struct render_state *so) so-aux_batch_offset = i * sizeof(u32); + if (IS_BROXTON(dev)) { + u32 pool_config = 0; + struct drm_i915_private *dev_priv = to_i915(dev); + + OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE); + OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE); + if (dev_priv-info.subslice_total == 3) + pool_config = 0x00777000; + OUT_BATCH(d, i, pool_config); + OUT_BATCH(d, i, 0); + OUT_BATCH(d, i, 0); + OUT_BATCH(d, i, 0); + } + OUT_BATCH(d, i, MI_BATCH_BUFFER_END); so-aux_batch_size = (i * sizeof(u32)) - so-aux_batch_offset; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 9a2ffad..e052499 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -379,6 +379,8 @@ */ #define GFX_INSTR(opcode, flags) ((0x3 29) | ((opcode) 24) | (flags)) +#define GEN9_MEDIA_POOL_STATE ((0x3 29) | (0x2 27) | (0x5 16) | 4) +#define GEN9_MEDIA_POOL_ENABLE (1 31) #define GFX_OP_RASTER_RULES((0x329)|(0x724)) #define GFX_OP_SCISSOR ((0x329)|(0x1c24)|(0x1019)) #define SC_UPDATE_SCISSOR (0x11) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v1 2/4] drm/i915: Add provision to extend Golden context batch
The Golden batch carries 3D state at the beginning so that HW starts with a known state. It is carried as a binary blob which is auto-generated from source. The idea was it would be easier to maintain and keep the complexity out of the kernel which makes sense as we don't really touch it. However if you really need to update it then you need to update generator source and keep the binary blob in sync with it. There is a need to patch this in bxt to send one additional command to enable a feature. A solution was to patch the binary data with some additional data structures (included as part of auto-generator source) but it was unnecessarily complicated. Chris suggested the idea of having a secondary batch and execute two batch buffers. It has clear advantages as we needn't touch the base golden batch, can customize secondary/auxiliary batch depending on Gen and can be carried in the driver with no dependencies. This patch adds support for this auxiliary batch which is inserted at the end of golden batch and is completely independent from it. Thanks to Mika for the preliminary review. Cc: Mika Kuoppala mika.kuopp...@intel.com Cc: Chris Wilson ch...@chris-wilson.co.uk Cc: Armin Reese armin.c.re...@intel.com Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_render_state.c | 27 +++ drivers/gpu/drm/i915/i915_gem_render_state.h | 2 ++ drivers/gpu/drm/i915/intel_lrc.c | 6 ++ 3 files changed, 35 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index b6492fe..b86e382 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -73,6 +73,15 @@ free_gem: return ret; } +#define OUT_BATCH(batch, i, val) \ + do {\ + if (WARN_ON((i) = PAGE_SIZE / sizeof(u32))) { \ + ret = -ENOSPC; \ + goto err_out; \ + } \ + (batch)[(i)++] = (val); \ + } while(0) + static int render_state_setup(struct render_state *so) { const struct intel_renderstate_rodata *rodata = so-rodata; @@ -110,6 +119,15 @@ static int render_state_setup(struct render_state *so) d[i++] = s; } + + while (i % CACHELINE_DWORDS) + OUT_BATCH(d, i, MI_NOOP); + + so-aux_batch_offset = i * sizeof(u32); + + OUT_BATCH(d, i, MI_BATCH_BUFFER_END); + so-aux_batch_size = (i * sizeof(u32)) - so-aux_batch_offset; + kunmap(page); ret = i915_gem_object_set_to_gtt_domain(so-obj, false); @@ -128,6 +146,8 @@ err_out: return ret; } +#undef OUT_BATCH + void i915_gem_render_state_fini(struct render_state *so) { i915_gem_object_ggtt_unpin(so-obj); @@ -176,6 +196,13 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) if (ret) goto out; + ret = req-ring-dispatch_execbuffer(req, +(so.ggtt_offset + so.aux_batch_offset), +so.aux_batch_size, +I915_DISPATCH_SECURE); + if (ret) + goto out; + i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); out: diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index 7aa7372..79de101 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -37,6 +37,8 @@ struct render_state { struct drm_i915_gem_object *obj; u64 ggtt_offset; int gen; + u32 aux_batch_size; + u64 aux_batch_offset; }; int i915_gem_render_state_init(struct drm_i915_gem_request *req); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index adb386d..5e4771e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1661,6 +1661,12 @@ static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req) if (ret) goto out; + ret = req-ring-emit_bb_start(req, + (so.ggtt_offset + so.aux_batch_offset), + I915_DISPATCH_SECURE); + if (ret) + goto out; + i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); out: -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v1 2/4] drm/i915: Add provision to extend Golden context batch
On Fri, Jul 17, 2015 at 05:08:52PM +0100, Arun Siluvery wrote: The Golden batch carries 3D state at the beginning so that HW starts with a known state. It is carried as a binary blob which is auto-generated from source. The idea was it would be easier to maintain and keep the complexity out of the kernel which makes sense as we don't really touch it. However if you really need to update it then you need to update generator source and keep the binary blob in sync with it. There is a need to patch this in bxt to send one additional command to enable a feature. A solution was to patch the binary data with some additional data structures (included as part of auto-generator source) but it was unnecessarily complicated. Chris suggested the idea of having a secondary batch and execute two batch buffers. It has clear advantages as we needn't touch the base golden batch, can customize secondary/auxiliary batch depending on Gen and can be carried in the driver with no dependencies. This patch adds support for this auxiliary batch which is inserted at the end of golden batch and is completely independent from it. Thanks to Mika for the preliminary review. Cc: Mika Kuoppala mika.kuopp...@intel.com Cc: Chris Wilson ch...@chris-wilson.co.uk Cc: Armin Reese armin.c.re...@intel.com Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_render_state.c | 27 +++ drivers/gpu/drm/i915/i915_gem_render_state.h | 2 ++ drivers/gpu/drm/i915/intel_lrc.c | 6 ++ 3 files changed, 35 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index b6492fe..b86e382 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -73,6 +73,15 @@ free_gem: return ret; } +#define OUT_BATCH(batch, i, val) \ + do {\ + if (WARN_ON((i) = PAGE_SIZE / sizeof(u32))) { \ We have to be slightly more careful here, as we don't have the full page available since we put render state into the high arena of the golden bb. Something like WARN_ON(i PAGE/sizeof(u32) || (batch)[i]) should suffice. @@ -110,6 +119,15 @@ static int render_state_setup(struct render_state *so) d[i++] = s; } + + while (i % CACHELINE_DWORDS) + OUT_BATCH(d, i, MI_NOOP); + + so-aux_batch_offset = i * sizeof(u32); + + OUT_BATCH(d, i, MI_BATCH_BUFFER_END); + so-aux_batch_size = (i * sizeof(u32)) - so-aux_batch_offset; Strictly, and if we are passing the batch length we are being strictly conformant, then the aux_batch_size must be a multiple of 8. + kunmap(page); ret = i915_gem_object_set_to_gtt_domain(so-obj, false); @@ -128,6 +146,8 @@ err_out: return ret; } +#undef OUT_BATCH + void i915_gem_render_state_fini(struct render_state *so) { i915_gem_object_ggtt_unpin(so-obj); @@ -176,6 +196,13 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) if (ret) goto out; Then we need only execute this BB if so.aux_batch_size 8 + ret = req-ring-dispatch_execbuffer(req, + (so.ggtt_offset + so.aux_batch_offset), + so.aux_batch_size, + I915_DISPATCH_SECURE); + if (ret) + goto out; + i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v1 3/4] drm/i915:bxt: Enable Pooled EU support
On Fri, Jul 17, 2015 at 05:08:53PM +0100, Arun Siluvery wrote: This mode allows to assign EUs to pools. The command to enable this mode is sent in auxiliary golden context batch as this is only issued once with each context initialization. Thanks to Mika for the preliminary review. A quick explanation for why this has to be in the kernel would be nice. Privileged instruction? Not fond of the split between this and patch 4. Patch 4 intoduces one feature flag that looks different to the one we use here to enable support. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH i-g-t 1/3] lib: Move gpgpu_fill code to separate file
The gpgpu fill utility functions are used in separate test so it's logical to keep them in separate file. This is similar to what media spin test did in the past. Functionally only gpgpu kernel changed. Send instruction payload size was reduced. Since offset is incremented by 0x10 bytes there is no point in using larger writes. Cc: Thomas Wood thomas.w...@intel.com Signed-off-by: Dominik Zeromski dominik.zerom...@intel.com --- lib/Makefile.sources | 4 +- lib/gpgpu_fill.c | 422 +++ lib/gpgpu_fill.h | 37 lib/intel_batchbuffer.c | 1 + lib/media_fill.h | 7 - lib/media_fill_gen7.c| 151 shaders/gpgpu/gpgpu_fill.gxa | 14 +- 7 files changed, 464 insertions(+), 172 deletions(-) create mode 100755 lib/gpgpu_fill.c create mode 100644 lib/gpgpu_fill.h diff --git a/lib/Makefile.sources b/lib/Makefile.sources index 7f88b65..695f609 100644 --- a/lib/Makefile.sources +++ b/lib/Makefile.sources @@ -32,7 +32,9 @@ libintel_tools_la_SOURCES = \ media_fill_gen8lp.c \ media_fill_gen9.c \ media_spin.h\ - media_spin.c\ + media_spin.c\ + gppgu_fill.h\ + gpgpu_fill.c\ gen7_media.h\ gen8_media.h\ rendercopy_i915.c \ diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c new file mode 100755 index 000..f0911e6 --- /dev/null +++ b/lib/gpgpu_fill.c @@ -0,0 +1,422 @@ +/* + * Copyright ?? 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Zhenyu Wang zhen...@linux.intel.com + * Dominik Zeromski dominik.zerom...@intel.com + */ + +#include intel_bufmgr.h +#include i915_drm.h + +#include intel_reg.h +#include drmtest.h +#include intel_batchbuffer.h +#include gen7_media.h +#include gpgpu_fill.h + +/* shaders/gpgpu/gpgpu_fill.gxa */ +static const uint32_t gen7_gpgpu_kernel[][4] = { + { 0x0041, 0x20200231, 0x0020, 0x }, + { 0x0041, 0x20400c21, 0x0004, 0x0010 }, + { 0x0001, 0x20440021, 0x0018, 0x }, + { 0x0061, 0x20800021, 0x008d, 0x }, + { 0x0021, 0x20800021, 0x00450040, 0x }, + { 0x0001, 0x20880061, 0x, 0x000f }, + { 0x0081, 0x20a00021, 0x0020, 0x }, + { 0x05800031, 0x24001ca8, 0x0080, 0x060a8000 }, + { 0x0061, 0x2e21, 0x008d, 0x }, + { 0x07800031, 0x20001ca8, 0x0e00, 0x8210 }, +}; + +static uint32_t +batch_used(struct intel_batchbuffer *batch) +{ + return batch-ptr - batch-buffer; +} + +static uint32_t +batch_align(struct intel_batchbuffer *batch, uint32_t align) +{ + uint32_t offset = batch_used(batch); + offset = ALIGN(offset, align); + batch-ptr = batch-buffer + offset; + return offset; +} + +static void * +batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align) +{ + uint32_t offset = batch_align(batch, align); + batch-ptr += size; + return memset(batch-buffer + offset, 0, size); +} + +static uint32_t +batch_offset(struct intel_batchbuffer *batch, void *ptr) +{ + return (uint8_t *)ptr - batch-buffer; +} + +static uint32_t +batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size, + uint32_t align) +{ + return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, size)); +} + +static void +gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end) +{ + int ret; + + ret = drm_intel_bo_subdata(batch-bo, 0, 4096, batch-buffer); + if (ret == 0) + ret = drm_intel_bo_mrb_exec(batch-bo, batch_end, + NULL, 0, 0, 0); + igt_assert(ret == 0); +} + +static
[Intel-gfx] [PATCH i-g-t 0/3] gpgpu_fill test new hardware support
This patches rearrange gpgpu fill library functions and add support for BDW and SKL. Dominik Zeromski (3): lib: Move gpgpu_fill code to separate file lib/gpgpu_fill: Add BDW support lib/gpgpu_fill: Add SKL support lib/Makefile.sources | 4 +- lib/gpgpu_fill.c | 808 +++ lib/gpgpu_fill.h | 51 +++ lib/intel_batchbuffer.c | 5 + lib/media_fill.h | 7 - lib/media_fill_gen7.c| 151 shaders/gpgpu/gpgpu_fill.gxa | 14 +- 7 files changed, 868 insertions(+), 172 deletions(-) create mode 100755 lib/gpgpu_fill.c create mode 100644 lib/gpgpu_fill.h -- 1.8.3.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH i-g-t 2/3] lib/gpgpu_fill: Add BDW support
BDW changed structure of surface state and interface descriptors. Commands like state base address, gpgpu walker were extended. Cc: Thomas Wood thomas.w...@intel.com Signed-off-by: Dominik Zeromski dominik.zerom...@intel.com --- lib/gpgpu_fill.c| 296 +++- lib/gpgpu_fill.h| 7 ++ lib/intel_batchbuffer.c | 2 + 3 files changed, 299 insertions(+), 6 deletions(-) diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c index f0911e6..c98f121 100755 --- a/lib/gpgpu_fill.c +++ b/lib/gpgpu_fill.c @@ -32,6 +32,7 @@ #include drmtest.h #include intel_batchbuffer.h #include gen7_media.h +#include gen8_media.h #include gpgpu_fill.h /* shaders/gpgpu/gpgpu_fill.gxa */ @@ -48,6 +49,19 @@ static const uint32_t gen7_gpgpu_kernel[][4] = { { 0x07800031, 0x20001ca8, 0x0e00, 0x8210 }, }; +static const uint32_t gen8_gpgpu_kernel[][4] = { + { 0x0041, 0x20202288, 0x0020, 0x }, + { 0x0041, 0x20400208, 0x0604, 0x0010 }, + { 0x0001, 0x20440208, 0x0018, 0x }, + { 0x0061, 0x20800208, 0x008d, 0x }, + { 0x0021, 0x20800208, 0x00450040, 0x }, + { 0x0001, 0x20880608, 0x, 0x000f }, + { 0x0081, 0x20a00208, 0x0020, 0x }, + { 0x0c800031, 0x24000a40, 0x0e80, 0x060a8000 }, + { 0x0061, 0x2e000208, 0x008d, 0x }, + { 0x07800031, 0x2a40, 0x0e000e00, 0x8210 }, +}; + static uint32_t batch_used(struct intel_batchbuffer *batch) { @@ -97,8 +111,7 @@ gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end) } static uint32_t -gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch, - uint8_t color) +gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch, uint8_t color) { uint8_t *curbe_buffer; uint32_t offset; @@ -160,6 +173,58 @@ gen7_fill_surface_state(struct intel_batchbuffer *batch, } static uint32_t +gen8_fill_surface_state(struct intel_batchbuffer *batch, + struct igt_buf *buf, + uint32_t format, + int is_dst) +{ + struct gen8_surface_state *ss; + uint32_t write_domain, read_domain, offset; + int ret; + + if (is_dst) { + write_domain = read_domain = I915_GEM_DOMAIN_RENDER; + } else { + write_domain = 0; + read_domain = I915_GEM_DOMAIN_SAMPLER; + } + + ss = batch_alloc(batch, sizeof(*ss), 64); + offset = batch_offset(batch, ss); + + ss-ss0.surface_type = GEN8_SURFACE_2D; + ss-ss0.surface_format = format; + ss-ss0.render_cache_read_write = 1; + ss-ss0.vertical_alignment = 1; /* align 4 */ + ss-ss0.horizontal_alignment = 1; /* align 4 */ + + if (buf-tiling == I915_TILING_X) + ss-ss0.tiled_mode = 2; + else if (buf-tiling == I915_TILING_Y) + ss-ss0.tiled_mode = 3; + + ss-ss8.base_addr = buf-bo-offset; + + ret = drm_intel_bo_emit_reloc(batch-bo, + batch_offset(batch, ss) + 8 * 4, + buf-bo, 0, + read_domain, write_domain); + igt_assert_eq(ret, 0); + + ss-ss2.height = igt_buf_height(buf) - 1; + ss-ss2.width = igt_buf_width(buf) - 1; + ss-ss3.pitch = buf-stride - 1; + + ss-ss7.shader_chanel_select_r = 4; + ss-ss7.shader_chanel_select_g = 5; + ss-ss7.shader_chanel_select_b = 6; + ss-ss7.shader_chanel_select_a = 7; + + return offset; + +} + +static uint32_t gen7_fill_binding_table(struct intel_batchbuffer *batch, struct igt_buf *dst) { @@ -174,6 +239,20 @@ gen7_fill_binding_table(struct intel_batchbuffer *batch, } static uint32_t +gen8_fill_binding_table(struct intel_batchbuffer *batch, + struct igt_buf *dst) +{ + uint32_t *binding_table, offset; + + binding_table = batch_alloc(batch, 32, 64); + offset = batch_offset(batch, binding_table); + + binding_table[0] = gen8_fill_surface_state(batch, dst, GEN8_SURFACEFORMAT_R8_UNORM, 1); + + return offset; +} + +static uint32_t gen7_fill_gpgpu_kernel(struct intel_batchbuffer *batch, const uint32_t kernel[][4], size_t size) @@ -216,6 +295,37 @@ gen7_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf * return offset; } +static uint32_t +gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst, + const uint32_t kernel[][4], size_t size) +{ + struct gen8_interface_descriptor_data *idd; + uint32_t offset; + uint32_t binding_table_offset, kernel_offset; + + binding_table_offset = gen8_fill_binding_table(batch, dst); + kernel_offset = gen7_fill_gpgpu_kernel(batch,
[Intel-gfx] [PATCH i-g-t 3/3] lib/gpgpu_fill: Add SKL support
SKL changed state base address command. Cc: Thomas Wood thomas.w...@intel.com Signed-off-by: Dominik Zeromski dominik.zerom...@intel.com --- lib/gpgpu_fill.c| 102 lib/gpgpu_fill.h| 7 lib/intel_batchbuffer.c | 2 + 3 files changed, 111 insertions(+) diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c index c98f121..4d98643 100755 --- a/lib/gpgpu_fill.c +++ b/lib/gpgpu_fill.c @@ -62,6 +62,19 @@ static const uint32_t gen8_gpgpu_kernel[][4] = { { 0x07800031, 0x2a40, 0x0e000e00, 0x8210 }, }; +static const uint32_t gen9_gpgpu_kernel[][4] = { + { 0x0041, 0x20202288, 0x0020, 0x }, + { 0x0041, 0x20400208, 0x0604, 0x0010 }, + { 0x0001, 0x20440208, 0x0018, 0x }, + { 0x0061, 0x20800208, 0x008d, 0x }, + { 0x0021, 0x20800208, 0x00450040, 0x }, + { 0x0001, 0x20880608, 0x, 0x000f }, + { 0x0081, 0x20a00208, 0x0020, 0x }, + { 0x0c800031, 0x24000a40, 0x0680, 0x060a8000 }, + { 0x0061, 0x2e000208, 0x008d, 0x }, + { 0x07800031, 0x2a40, 0x06000e00, 0x8210 }, +}; + static uint32_t batch_used(struct intel_batchbuffer *batch) { @@ -390,6 +403,47 @@ gen8_emit_state_base_address(struct intel_batchbuffer *batch) } static void +gen9_emit_state_base_address(struct intel_batchbuffer *batch) +{ + OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2)); + + /* general */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(0); + + /* stateless data port */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); + + /* surface */ + OUT_RELOC(batch-bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY); + + /* dynamic */ + OUT_RELOC(batch-bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, + 0, BASE_ADDRESS_MODIFY); + + /* indirect */ + OUT_BATCH(0); + OUT_BATCH(0); + + /* instruction */ + OUT_RELOC(batch-bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); + + /* general state buffer size */ + OUT_BATCH(0xf000 | 1); + /* dynamic state buffer size */ + OUT_BATCH(1 12 | 1); + /* indirect object buffer size */ + OUT_BATCH(0xf000 | 1); + /* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */ + OUT_BATCH(1 12 | 1); + + /* Bindless surface state base address */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); + OUT_BATCH(0); + OUT_BATCH(0xf000); +} + +static void gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch) { OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2)); @@ -704,3 +758,51 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch, gen7_render_flush(batch, batch_end); intel_batchbuffer_reset(batch); } + +void +gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch, + struct igt_buf *dst, + unsigned x, unsigned y, + unsigned width, unsigned height, + uint8_t color) +{ + uint32_t curbe_buffer, interface_descriptor; + uint32_t batch_end; + + intel_batchbuffer_flush(batch); + + /* setup states */ + batch-ptr = batch-buffer[BATCH_STATE_SPLIT]; + + /* +* const buffer needs to fill for every thread, but as we have just 1 thread +* per every group, so need only one curbe data. +* +* For each thread, just use thread group ID for buffer offset. +*/ + curbe_buffer = gen7_fill_curbe_buffer_data(batch, color); + + interface_descriptor = gen8_fill_interface_descriptor(batch, dst, + gen9_gpgpu_kernel, + sizeof(gen9_gpgpu_kernel)); + igt_assert(batch-ptr batch-buffer[4095]); + + batch-ptr = batch-buffer; + + /* GPGPU pipeline */ + OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU); + + gen9_emit_state_base_address(batch); + gen8_emit_vfe_state_gpgpu(batch); + gen7_emit_curbe_load(batch, curbe_buffer); + gen7_emit_interface_descriptor_load(batch, interface_descriptor); + gen8_emit_gpgpu_walk(batch, x, y, width, height); + + OUT_BATCH(MI_BATCH_BUFFER_END); + + batch_end = batch_align(batch, 8); + igt_assert(batch_end BATCH_STATE_SPLIT); + + gen7_render_flush(batch, batch_end); + intel_batchbuffer_reset(batch); +} diff --git a/lib/gpgpu_fill.h b/lib/gpgpu_fill.h index 2d14881..7b5c832 100644 --- a/lib/gpgpu_fill.h +++ b/lib/gpgpu_fill.h @@ -41,4 +41,11 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch, unsigned width, unsigned height, uint8_t color); +void +gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch, + struct
Re: [Intel-gfx] [PATCH v1 2/4] drm/i915: Add provision to extend Golden context batch
On Fri, Jul 17, 2015 at 07:37:45PM +0300, Mika Kuoppala wrote: Chris Wilson ch...@chris-wilson.co.uk writes: On Fri, Jul 17, 2015 at 05:08:52PM +0100, Arun Siluvery wrote: The Golden batch carries 3D state at the beginning so that HW starts with a known state. It is carried as a binary blob which is auto-generated from source. The idea was it would be easier to maintain and keep the complexity out of the kernel which makes sense as we don't really touch it. However if you really need to update it then you need to update generator source and keep the binary blob in sync with it. There is a need to patch this in bxt to send one additional command to enable a feature. A solution was to patch the binary data with some additional data structures (included as part of auto-generator source) but it was unnecessarily complicated. Chris suggested the idea of having a secondary batch and execute two batch buffers. It has clear advantages as we needn't touch the base golden batch, can customize secondary/auxiliary batch depending on Gen and can be carried in the driver with no dependencies. This patch adds support for this auxiliary batch which is inserted at the end of golden batch and is completely independent from it. Thanks to Mika for the preliminary review. Cc: Mika Kuoppala mika.kuopp...@intel.com Cc: Chris Wilson ch...@chris-wilson.co.uk Cc: Armin Reese armin.c.re...@intel.com Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_render_state.c | 27 +++ drivers/gpu/drm/i915/i915_gem_render_state.h | 2 ++ drivers/gpu/drm/i915/intel_lrc.c | 6 ++ 3 files changed, 35 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index b6492fe..b86e382 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -73,6 +73,15 @@ free_gem: return ret; } +#define OUT_BATCH(batch, i, val) \ + do {\ + if (WARN_ON((i) = PAGE_SIZE / sizeof(u32))) { \ We have to be slightly more careful here, as we don't have the full page available since we put render state into the high arena of the golden bb. Something like WARN_ON(i PAGE/sizeof(u32) || (batch)[i]) should suffice. Null state gen makes the final batch with two passes. First it builds command and state separately. And when size of both are know, it compacts by relocating the state right after the commands (+some alignment). So we should have the rest of the page usable for auxillary commands here as we have already copied the state part also. Ta. Maybe add some words of enlightenment here for future me as well? Also we will need to document that the kernel then relies on the packing to add extra commands after the batch to the null state generator. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v1 3/4] drm/i915:bxt: Enable Pooled EU support
On Fri, Jul 17, 2015 at 05:54:20PM +0100, Siluvery, Arun wrote: On 17/07/2015 17:27, Chris Wilson wrote: On Fri, Jul 17, 2015 at 05:08:53PM +0100, Arun Siluvery wrote: This mode allows to assign EUs to pools. The command to enable this mode is sent in auxiliary golden context batch as this is only issued once with each context initialization. Thanks to Mika for the preliminary review. A quick explanation for why this has to be in the kernel would be nice. Privileged instruction? This purpose of auxiliary batch is explained in patch2, but I can add some explanation about this one also. Here, I am looking for an explanation of why these commands in particular are desired. Mika's short explanation that must be the same for all contexts on the system is sufficient. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v1 2/4] drm/i915: Add provision to extend Golden context batch
Chris Wilson ch...@chris-wilson.co.uk writes: On Fri, Jul 17, 2015 at 05:08:52PM +0100, Arun Siluvery wrote: The Golden batch carries 3D state at the beginning so that HW starts with a known state. It is carried as a binary blob which is auto-generated from source. The idea was it would be easier to maintain and keep the complexity out of the kernel which makes sense as we don't really touch it. However if you really need to update it then you need to update generator source and keep the binary blob in sync with it. There is a need to patch this in bxt to send one additional command to enable a feature. A solution was to patch the binary data with some additional data structures (included as part of auto-generator source) but it was unnecessarily complicated. Chris suggested the idea of having a secondary batch and execute two batch buffers. It has clear advantages as we needn't touch the base golden batch, can customize secondary/auxiliary batch depending on Gen and can be carried in the driver with no dependencies. This patch adds support for this auxiliary batch which is inserted at the end of golden batch and is completely independent from it. Thanks to Mika for the preliminary review. Cc: Mika Kuoppala mika.kuopp...@intel.com Cc: Chris Wilson ch...@chris-wilson.co.uk Cc: Armin Reese armin.c.re...@intel.com Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_render_state.c | 27 +++ drivers/gpu/drm/i915/i915_gem_render_state.h | 2 ++ drivers/gpu/drm/i915/intel_lrc.c | 6 ++ 3 files changed, 35 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index b6492fe..b86e382 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -73,6 +73,15 @@ free_gem: return ret; } +#define OUT_BATCH(batch, i, val)\ +do {\ +if (WARN_ON((i) = PAGE_SIZE / sizeof(u32))) { \ We have to be slightly more careful here, as we don't have the full page available since we put render state into the high arena of the golden bb. Something like WARN_ON(i PAGE/sizeof(u32) || (batch)[i]) should suffice. Null state gen makes the final batch with two passes. First it builds command and state separately. And when size of both are know, it compacts by relocating the state right after the commands (+some alignment). So we should have the rest of the page usable for auxillary commands here as we have already copied the state part also. -Mika @@ -110,6 +119,15 @@ static int render_state_setup(struct render_state *so) d[i++] = s; } + +while (i % CACHELINE_DWORDS) +OUT_BATCH(d, i, MI_NOOP); + +so-aux_batch_offset = i * sizeof(u32); + +OUT_BATCH(d, i, MI_BATCH_BUFFER_END); +so-aux_batch_size = (i * sizeof(u32)) - so-aux_batch_offset; Strictly, and if we are passing the batch length we are being strictly conformant, then the aux_batch_size must be a multiple of 8. + kunmap(page); ret = i915_gem_object_set_to_gtt_domain(so-obj, false); @@ -128,6 +146,8 @@ err_out: return ret; } +#undef OUT_BATCH + void i915_gem_render_state_fini(struct render_state *so) { i915_gem_object_ggtt_unpin(so-obj); @@ -176,6 +196,13 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) if (ret) goto out; Then we need only execute this BB if so.aux_batch_size 8 +ret = req-ring-dispatch_execbuffer(req, + (so.ggtt_offset + so.aux_batch_offset), + so.aux_batch_size, + I915_DISPATCH_SECURE); +if (ret) +goto out; + i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v1 3/4] drm/i915:bxt: Enable Pooled EU support
Chris Wilson ch...@chris-wilson.co.uk writes: On Fri, Jul 17, 2015 at 05:08:53PM +0100, Arun Siluvery wrote: This mode allows to assign EUs to pools. The command to enable this mode is sent in auxiliary golden context batch as this is only issued once with each context initialization. Thanks to Mika for the preliminary review. A quick explanation for why this has to be in the kernel would be nice. Privileged instruction? The pooled mode is global. Once set, it has to stay same across all contexts until subsequent fw reset. -Mika Not fond of the split between this and patch 4. Patch 4 intoduces one feature flag that looks different to the one we use here to enable support. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v1 3/4] drm/i915:bxt: Enable Pooled EU support
On 17/07/2015 17:27, Chris Wilson wrote: On Fri, Jul 17, 2015 at 05:08:53PM +0100, Arun Siluvery wrote: This mode allows to assign EUs to pools. The command to enable this mode is sent in auxiliary golden context batch as this is only issued once with each context initialization. Thanks to Mika for the preliminary review. A quick explanation for why this has to be in the kernel would be nice. Privileged instruction? This purpose of auxiliary batch is explained in patch2, but I can add some explanation about this one also. Not fond of the split between this and patch 4. Patch 4 intoduces one feature flag that looks different to the one we use here to enable support. I will patch4 as separate as it deals with libdrm changes but use the feature flag in this one. regards Arun -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 3/3] drm/i915:bxt: Enable Pooled EU support
This mode allows to assign EUs to pools which can process work collectively. The command to enable this mode should be issued as part of context initialization. The pooled mode is global, once enabled it has to stay the same across all contexts until HW reset hence this is sent in auxiliary golden context batch. Thanks to Mika for the preliminary review and comments. v2: explain why this is enabled in golden context, use feature flag while enabling the support (Chris) v3: Pooled EU support announced in userspace before enabling in kernel, to simplify include all changes in the same patch. User space clients need to know when the pooled EU feature is present and enabled on the hardware so that they can adapt work submissions. Create a new device info flag for this purpose, and create a new GETPARAM entry to allow user space to query its setting. Set has_pooled_eu to true in the Broxton static device info - Broxton supports the feature in hardware and the driver will enable it by default. Cc: Mika Kuoppala mika.kuopp...@intel.com Cc: Chris Wilson ch...@chris-wilson.co.uk Cc: Armin Reese armin.c.re...@intel.com Signed-off-by: Jeff McGee jeff.mc...@intel.com Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 5 - drivers/gpu/drm/i915/i915_gem_render_state.c | 13 + drivers/gpu/drm/i915/i915_reg.h | 2 ++ include/uapi/drm/i915_drm.h | 1 + 6 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 5e63076..6c31beb 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_RESOURCE_STREAMER: value = HAS_RESOURCE_STREAMER(dev); break; + case I915_PARAM_HAS_POOLED_EU: + value = HAS_POOLED_EU(dev); + break; default: DRM_DEBUG(Unknown parameter %d\n, param-param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index e44dc0d..213f74d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -389,6 +389,7 @@ static const struct intel_device_info intel_broxton_info = { .num_pipes = 3, .has_ddi = 1, .has_fbc = 1, + .has_pooled_eu = 1, GEN_DEFAULT_PIPEOFFSETS, IVB_CURSOR_OFFSETS, }; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 768d1db..32850a8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -775,7 +775,8 @@ struct intel_csr { func(supports_tv) sep \ func(has_llc) sep \ func(has_ddi) sep \ - func(has_fpga_dbg) + func(has_fpga_dbg) sep \ + func(has_pooled_eu) #define DEFINE_FLAG(name) u8 name:1 #define SEP_SEMICOLON ; @@ -2549,6 +2550,8 @@ struct drm_i915_cmd_table { #define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \ INTEL_INFO(dev)-gen = 8) +#define HAS_POOLED_EU(dev) (INTEL_INFO(dev)-has_pooled_eu) + #define INTEL_PCH_DEVICE_ID_MASK 0xff00 #define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00 #define INTEL_PCH_CPT_DEVICE_ID_TYPE 0x1c00 diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 5026a62..8866040 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -93,6 +93,7 @@ free_gem: static int render_state_setup(struct render_state *so) { + struct drm_device *dev = so-obj-base.dev; const struct intel_renderstate_rodata *rodata = so-rodata; unsigned int i = 0, reloc_index = 0; struct page *page; @@ -134,6 +135,18 @@ static int render_state_setup(struct render_state *so) so-aux_batch_offset = i * sizeof(u32); + if (HAS_POOLED_EU(dev)) { + u32 pool_config = (INTEL_INFO(dev)-subslice_total == 3 ? + 0x00777000 : 0); + + OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE); + OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE); + OUT_BATCH(d, i, pool_config); + OUT_BATCH(d, i, 0); + OUT_BATCH(d, i, 0); + OUT_BATCH(d, i, 0); + } + OUT_BATCH(d, i, MI_BATCH_BUFFER_END); so-aux_batch_size = (i * sizeof(u32)) - so-aux_batch_offset; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 9a2ffad..e052499 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -379,6 +379,8 @@ */ #define GFX_INSTR(opcode, flags) ((0x3 29) | ((opcode) 24)
Re: [Intel-gfx] [PATCH v2 3/4] drm/i915/bxt: Add get_param to query Pooled EU availability
On 17/07/2015 19:13, Arun Siluvery wrote: User space clients need to know when the pooled EU feature is present and enabled on the hardware so that they can adapt work submissions. Create a new device info flag for this purpose, and create a new GETPARAM entry to allow user space to query its setting. Set has_pooled_eu to true in the Broxton static device info - Broxton supports the feature in hardware and the driver will enable it by default. Signed-off-by: Jeff McGee jeff.mc...@intel.com Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com --- Please ignore this patch, this is squashed with Patch4 drm/i915:bxt: Enable Pooled EU support to keep all enabling changes in the same place otherwise we would've announced support to userspace before enabling it in kernel. regards Arun drivers/gpu/drm/i915/i915_dma.c | 3 +++ drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 5 - include/uapi/drm/i915_drm.h | 1 + 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 5e63076..6c31beb 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_RESOURCE_STREAMER: value = HAS_RESOURCE_STREAMER(dev); break; + case I915_PARAM_HAS_POOLED_EU: + value = HAS_POOLED_EU(dev); + break; default: DRM_DEBUG(Unknown parameter %d\n, param-param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index e44dc0d..213f74d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -389,6 +389,7 @@ static const struct intel_device_info intel_broxton_info = { .num_pipes = 3, .has_ddi = 1, .has_fbc = 1, + .has_pooled_eu = 1, GEN_DEFAULT_PIPEOFFSETS, IVB_CURSOR_OFFSETS, }; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 768d1db..32850a8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -775,7 +775,8 @@ struct intel_csr { func(supports_tv) sep \ func(has_llc) sep \ func(has_ddi) sep \ - func(has_fpga_dbg) + func(has_fpga_dbg) sep \ + func(has_pooled_eu) #define DEFINE_FLAG(name) u8 name:1 #define SEP_SEMICOLON ; @@ -2549,6 +2550,8 @@ struct drm_i915_cmd_table { #define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \ INTEL_INFO(dev)-gen = 8) +#define HAS_POOLED_EU(dev) (INTEL_INFO(dev)-has_pooled_eu) + #define INTEL_PCH_DEVICE_ID_MASK 0xff00 #define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00 #define INTEL_PCH_CPT_DEVICE_ID_TYPE 0x1c00 diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e7c29f1..9649577 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_EU_TOTAL34 #define I915_PARAM_HAS_GPU_RESET 35 #define I915_PARAM_HAS_RESOURCE_STREAMER 36 +#define I915_PARAM_HAS_POOLED_EU 37 typedef struct drm_i915_getparam { int param; ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 3/4] drm/i915/bxt: Add get_param to query Pooled EU availability
User space clients need to know when the pooled EU feature is present and enabled on the hardware so that they can adapt work submissions. Create a new device info flag for this purpose, and create a new GETPARAM entry to allow user space to query its setting. Set has_pooled_eu to true in the Broxton static device info - Broxton supports the feature in hardware and the driver will enable it by default. Signed-off-by: Jeff McGee jeff.mc...@intel.com Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 5 - include/uapi/drm/i915_drm.h | 1 + 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 5e63076..6c31beb 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_RESOURCE_STREAMER: value = HAS_RESOURCE_STREAMER(dev); break; + case I915_PARAM_HAS_POOLED_EU: + value = HAS_POOLED_EU(dev); + break; default: DRM_DEBUG(Unknown parameter %d\n, param-param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index e44dc0d..213f74d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -389,6 +389,7 @@ static const struct intel_device_info intel_broxton_info = { .num_pipes = 3, .has_ddi = 1, .has_fbc = 1, + .has_pooled_eu = 1, GEN_DEFAULT_PIPEOFFSETS, IVB_CURSOR_OFFSETS, }; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 768d1db..32850a8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -775,7 +775,8 @@ struct intel_csr { func(supports_tv) sep \ func(has_llc) sep \ func(has_ddi) sep \ - func(has_fpga_dbg) + func(has_fpga_dbg) sep \ + func(has_pooled_eu) #define DEFINE_FLAG(name) u8 name:1 #define SEP_SEMICOLON ; @@ -2549,6 +2550,8 @@ struct drm_i915_cmd_table { #define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \ INTEL_INFO(dev)-gen = 8) +#define HAS_POOLED_EU(dev) (INTEL_INFO(dev)-has_pooled_eu) + #define INTEL_PCH_DEVICE_ID_MASK 0xff00 #define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00 #define INTEL_PCH_CPT_DEVICE_ID_TYPE 0x1c00 diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e7c29f1..9649577 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_EU_TOTAL 34 #define I915_PARAM_HAS_GPU_RESET35 #define I915_PARAM_HAS_RESOURCE_STREAMER 36 +#define I915_PARAM_HAS_POOLED_EU 37 typedef struct drm_i915_getparam { int param; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 2/4] drm/i915: Add provision to extend Golden context batch
The Golden batch carries 3D state at the beginning so that HW starts with a known state. It is carried as a binary blob which is auto-generated from source. The idea was it would be easier to maintain and keep the complexity out of the kernel which makes sense as we don't really touch it. However if you really need to update it then you need to update generator source and keep the binary blob in sync with it. There is a need to patch this in bxt to send one additional command to enable a feature. A solution was to patch the binary data with some additional data structures (included as part of auto-generator source) but it was unnecessarily complicated. Chris suggested the idea of having a secondary batch and execute two batch buffers. It has clear advantages as we needn't touch the base golden batch, can customize secondary/auxiliary batch depending on Gen and can be carried in the driver with no dependencies. This patch adds support for this auxiliary batch which is inserted at the end of golden batch and is completely independent from it. Thanks to Mika for the preliminary review. v2: Strictly conform to the batch size requirements to cover Gen2 and add comments to clarify overflow check in macro (Chris, Mika). Cc: Mika Kuoppala mika.kuopp...@intel.com Cc: Chris Wilson ch...@chris-wilson.co.uk Cc: Armin Reese armin.c.re...@intel.com Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_render_state.c | 45 drivers/gpu/drm/i915/i915_gem_render_state.h | 2 ++ drivers/gpu/drm/i915/intel_lrc.c | 6 3 files changed, 53 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index b6492fe..5026a62 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -73,6 +73,24 @@ free_gem: return ret; } +/* + * Macro to add commands to auxiliary batch. + * This macro only checks for page overflow before inserting the commands, + * this is sufficient as the null state generator makes the final batch + * with two passes to build command and state separately. At this point + * the size of both are known and it compacts them by relocating the state + * right after the commands taking care of aligment so we should sufficient + * space below them for adding new commands. + */ +#define OUT_BATCH(batch, i, val) \ + do {\ + if (WARN_ON((i) = PAGE_SIZE / sizeof(u32))) { \ + ret = -ENOSPC; \ + goto err_out; \ + } \ + (batch)[(i)++] = (val); \ + } while(0) + static int render_state_setup(struct render_state *so) { const struct intel_renderstate_rodata *rodata = so-rodata; @@ -110,6 +128,21 @@ static int render_state_setup(struct render_state *so) d[i++] = s; } + + while (i % CACHELINE_DWORDS) + OUT_BATCH(d, i, MI_NOOP); + + so-aux_batch_offset = i * sizeof(u32); + + OUT_BATCH(d, i, MI_BATCH_BUFFER_END); + so-aux_batch_size = (i * sizeof(u32)) - so-aux_batch_offset; + + /* +* Since we are sending length, we need to strictly conform to +* all requirements. For Gen2 this must be a multiple of 8. +*/ + so-aux_batch_size = ALIGN(so-aux_batch_size, 8); + kunmap(page); ret = i915_gem_object_set_to_gtt_domain(so-obj, false); @@ -128,6 +161,8 @@ err_out: return ret; } +#undef OUT_BATCH + void i915_gem_render_state_fini(struct render_state *so) { i915_gem_object_ggtt_unpin(so-obj); @@ -176,6 +211,16 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) if (ret) goto out; + if (so.aux_batch_size 8) { + ret = req-ring-dispatch_execbuffer(req, +(so.ggtt_offset + + so.aux_batch_offset), +so.aux_batch_size, +I915_DISPATCH_SECURE); + if (ret) + goto out; + } + i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); out: diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index 7aa7372..79de101 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -37,6 +37,8 @@ struct render_state { struct drm_i915_gem_object *obj; u64 ggtt_offset; int gen; + u32 aux_batch_size; + u64 aux_batch_offset; }; int
[Intel-gfx] [PATCH v2 0/4] Add Pooled EU support to BXT
v1: http://lists.freedesktop.org/archives/intel-gfx/2015-July/071951.html v2: auxiliary batch size must be a multiple of 8, use feature flag while enabling support and add comments to clarify various things. Resending all patches as the order is changed. Arun Siluvery (3): drm/i915: Add provision to extend Golden context batch drm/i915/bxt: Add get_param to query Pooled EU availability drm/i915:bxt: Enable Pooled EU support Mika Kuoppala (1): drm/i915: Do kunmap if renderstate parsing fails drivers/gpu/drm/i915/i915_dma.c | 3 ++ drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 5 +- drivers/gpu/drm/i915/i915_gem_render_state.c | 70 +++- drivers/gpu/drm/i915/i915_gem_render_state.h | 2 + drivers/gpu/drm/i915/i915_reg.h | 2 + drivers/gpu/drm/i915/intel_lrc.c | 6 +++ include/uapi/drm/i915_drm.h | 1 + 8 files changed, 87 insertions(+), 3 deletions(-) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 4/4] drm/i915:bxt: Enable Pooled EU support
This mode allows to assign EUs to pools which can process work collectively. The command to enable this mode should be issued as part of context initialization. The pooled mode is global, once enabled it has to stay the same across all contexts until HW reset hence this is sent in auxiliary golden context batch. Thanks to Mika for the preliminary review and comments. v2: explain why this is enabled in golden context, use feature flag while enabling the support (Chris) Cc: Mika Kuoppala mika.kuopp...@intel.com Cc: Chris Wilson ch...@chris-wilson.co.uk Cc: Armin Reese armin.c.re...@intel.com Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_render_state.c | 15 +++ drivers/gpu/drm/i915/i915_reg.h | 2 ++ 2 files changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 5026a62..e4ff342 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -93,6 +93,7 @@ free_gem: static int render_state_setup(struct render_state *so) { + struct drm_device *dev = so-obj-base.dev; const struct intel_renderstate_rodata *rodata = so-rodata; unsigned int i = 0, reloc_index = 0; struct page *page; @@ -134,6 +135,20 @@ static int render_state_setup(struct render_state *so) so-aux_batch_offset = i * sizeof(u32); + if (HAS_POOLED_EU(dev)) { + u32 pool_config = 0; + struct drm_i915_private *dev_priv = to_i915(dev); + + OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE); + OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE); + if (dev_priv-info.subslice_total == 3) + pool_config = 0x00777000; + OUT_BATCH(d, i, pool_config); + OUT_BATCH(d, i, 0); + OUT_BATCH(d, i, 0); + OUT_BATCH(d, i, 0); + } + OUT_BATCH(d, i, MI_BATCH_BUFFER_END); so-aux_batch_size = (i * sizeof(u32)) - so-aux_batch_offset; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 9a2ffad..e052499 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -379,6 +379,8 @@ */ #define GFX_INSTR(opcode, flags) ((0x3 29) | ((opcode) 24) | (flags)) +#define GEN9_MEDIA_POOL_STATE ((0x3 29) | (0x2 27) | (0x5 16) | 4) +#define GEN9_MEDIA_POOL_ENABLE (1 31) #define GFX_OP_RASTER_RULES((0x329)|(0x724)) #define GFX_OP_SCISSOR ((0x329)|(0x1c24)|(0x1019)) #define SC_UPDATE_SCISSOR (0x11) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 1/4] drm/i915: Do kunmap if renderstate parsing fails
From: Mika Kuoppala mika.kuopp...@linux.intel.com Kunmap the renderstate page on error path. Reviewed-by: Arun Siluvery arun.siluv...@linux.intel.com Signed-off-by: Mika Kuoppala mika.kuopp...@intel.com --- drivers/gpu/drm/i915/i915_gem_render_state.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index a0201fc..b6492fe 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -96,8 +96,10 @@ static int render_state_setup(struct render_state *so) s = lower_32_bits(r); if (so-gen = 8) { if (i + 1 = rodata-batch_items || - rodata-batch[i + 1] != 0) - return -EINVAL; + rodata-batch[i + 1] != 0) { + ret = -EINVAL; + goto err_out; + } d[i++] = s; s = upper_32_bits(r); @@ -120,6 +122,10 @@ static int render_state_setup(struct render_state *so) } return 0; + +err_out: + kunmap(page); + return ret; } void i915_gem_render_state_fini(struct render_state *so) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2 4/4] drm/i915:bxt: Enable Pooled EU support
On Fri, Jul 17, 2015 at 07:13:34PM +0100, Arun Siluvery wrote: This mode allows to assign EUs to pools which can process work collectively. The command to enable this mode should be issued as part of context initialization. The pooled mode is global, once enabled it has to stay the same across all contexts until HW reset hence this is sent in auxiliary golden context batch. Thanks to Mika for the preliminary review and comments. v2: explain why this is enabled in golden context, use feature flag while enabling the support (Chris) You fell into the trap of telling userspace this was setup before we actually do so. Cc: Mika Kuoppala mika.kuopp...@intel.com Cc: Chris Wilson ch...@chris-wilson.co.uk Cc: Armin Reese armin.c.re...@intel.com Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_render_state.c | 15 +++ drivers/gpu/drm/i915/i915_reg.h | 2 ++ 2 files changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 5026a62..e4ff342 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -93,6 +93,7 @@ free_gem: static int render_state_setup(struct render_state *so) { + struct drm_device *dev = so-obj-base.dev; const struct intel_renderstate_rodata *rodata = so-rodata; unsigned int i = 0, reloc_index = 0; struct page *page; @@ -134,6 +135,20 @@ static int render_state_setup(struct render_state *so) so-aux_batch_offset = i * sizeof(u32); + if (HAS_POOLED_EU(dev)) { + u32 pool_config = 0; + struct drm_i915_private *dev_priv = to_i915(dev); Just a minor, as this would be neater as u32 pool_config = INTEL_INFO(dev)-subslice_total == 3 ? 0x00777000 : 0; At the very least keep both paths to set pool_config next to each other, e.g. u32 pool_config; ... pool_config = 0; if (INTEL_INFO(dev)-subslice_total == 3) pool_config = 0x00777000; Then we just have + OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE); + OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE); + OUT_BATCH(d, i, pool_config); + OUT_BATCH(d, i, 0); + OUT_BATCH(d, i, 0); + OUT_BATCH(d, i, 0); Which is much easier to read. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 04/13 v4] drm/i915: GuC-specific firmware loader
On Thu, Jul 09, 2015 at 07:29:05PM +0100, Dave Gordon wrote: From: Alex Dai yu@intel.com This fetches the required firmware image from the filesystem, then loads it into the GuC's memory via a dedicated DMA engine. This patch is derived from GuC loading work originally done by Vinit Azad and Ben Widawsky. v2: Various improvements per review comments by Chris Wilson v3: Removed 'wait' parameter to intel_guc_ucode_load() as firmware prefetch is no longer supported in the common firmware loader, per Daniel Vetter's request. Firmware checker callback fn now returns errno rather than bool. v4: Squash uC-independent code into GuC-specifc loader [Daniel Vetter] Don't keep the driver working (by falling back to execlist mode) if GuC firmware loading fails [Daniel Vetter] Issue: VIZ-4884 Signed-off-by: Alex Dai yu@intel.com Signed-off-by: Dave Gordon david.s.gor...@intel.com --- drivers/gpu/drm/i915/Makefile | 3 + drivers/gpu/drm/i915/i915_dma.c | 4 + drivers/gpu/drm/i915/i915_drv.h | 11 + drivers/gpu/drm/i915/i915_gem.c | 13 + drivers/gpu/drm/i915/i915_reg.h | 4 +- drivers/gpu/drm/i915/intel_guc.h| 67 drivers/gpu/drm/i915/intel_guc_loader.c | 536 7 files changed, 637 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/i915/intel_guc.h create mode 100644 drivers/gpu/drm/i915/intel_guc_loader.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index de21965..e604cfe 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -39,6 +39,9 @@ i915-y += i915_cmd_parser.o \ intel_ringbuffer.o \ intel_uncore.o +# general-purpose microcontroller (GuC) support +i915-y += intel_guc_loader.o + # autogenerated null render state i915-y += intel_renderstate_gen6.o \ intel_renderstate_gen7.o \ diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 066c34c..958ab4f 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -472,6 +472,7 @@ static int i915_load_modeset_init(struct drm_device *dev) cleanup_gem: mutex_lock(dev-struct_mutex); + intel_guc_ucode_fini(dev); i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); mutex_unlock(dev-struct_mutex); @@ -869,6 +870,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) intel_uncore_init(dev); + intel_guc_ucode_init(dev); + /* Load CSR Firmware for SKL */ intel_csr_ucode_init(dev); @@ -1120,6 +1123,7 @@ int i915_driver_unload(struct drm_device *dev) flush_workqueue(dev_priv-wq); mutex_lock(dev-struct_mutex); + intel_guc_ucode_fini(dev); i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); mutex_unlock(dev-struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4a512da..15b9202 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -50,6 +50,7 @@ #include linux/intel-iommu.h #include linux/kref.h #include linux/pm_qos.h +#include intel_guc.h /* General customization: */ @@ -1694,6 +1695,8 @@ struct drm_i915_private { struct i915_virtual_gpu vgpu; + struct intel_guc guc; + struct intel_csr csr; /* Display CSR-related protection */ @@ -1938,6 +1941,11 @@ static inline struct drm_i915_private *dev_to_i915(struct device *dev) return to_i915(dev_get_drvdata(dev)); } +static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc) +{ + return container_of(guc, struct drm_i915_private, guc); +} + /* Iterate over initialised rings */ #define for_each_ring(ring__, dev_priv__, i__) \ for ((i__) = 0; (i__) I915_NUM_RINGS; (i__)++) \ @@ -2543,6 +2551,9 @@ struct drm_i915_cmd_table { #define HAS_CSR(dev) (IS_SKYLAKE(dev)) +#define HAS_GUC_UCODE(dev) (IS_GEN9(dev)) +#define HAS_GUC_SCHED(dev) (IS_GEN9(dev)) + #define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \ INTEL_INFO(dev)-gen = 8) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index dbbb649..e020309 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5074,6 +5074,19 @@ i915_gem_init_hw(struct drm_device *dev) goto out; } + /* We can't enable contexts until all firmware is loaded */ + ret = intel_guc_ucode_load(dev); + + /* + * If we got an error and GuC submission is enabled, map + * the error to -EIO so the GPU will be declared wedged. + * OTOH, if we didn't intend to use the GuC anyway, just + * discard the error and carry on. + */ + ret =
Re: [Intel-gfx] [PATCH 01/13 v4] drm/i915: Add i915_gem_object_create_from_data()
On Thu, Jul 09, 2015 at 07:29:02PM +0100, Dave Gordon wrote: i915_gem_object_create_from_data() is a generic function to save data from a plain linear buffer in a new pageable gem object that can later be accessed by the CPU and/or GPU. We will need this for the microcontroller firmware loading support code. Derived from i915_gem_object_write(), originally by Alex Dai v2: Change of function: now allocates fills a new object, rather than writing to an existing object New name courtesy of Chris Wilson Explicit domain-setting and other improvements per review comments by Chris Wilson Daniel Vetter v4: Rebased Issue: VIZ-4884 Signed-off-by: Alex Dai yu@intel.com Signed-off-by: Dave Gordon david.s.gor...@intel.com --- Reviewed-by: Tom O'Rourke Tom.O'rou...@intel.com drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 40 2 files changed, 42 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 464b28d..3c91507 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2755,6 +2755,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops); struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, size_t size); +struct drm_i915_gem_object *i915_gem_object_create_from_data( + struct drm_device *dev, const void *data, size_t size); void i915_init_vm(struct drm_i915_private *dev_priv, struct i915_address_space *vm); void i915_gem_free_object(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a0bff41..dbbb649 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5478,3 +5478,43 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) return false; } + +/* Allocate a new GEM object and fill it with the supplied data */ +struct drm_i915_gem_object * +i915_gem_object_create_from_data(struct drm_device *dev, + const void *data, size_t size) +{ + struct drm_i915_gem_object *obj; + struct sg_table *sg; + size_t bytes; + int ret; + + obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE)); + if (IS_ERR_OR_NULL(obj)) + return obj; + + ret = i915_gem_object_set_to_cpu_domain(obj, true); + if (ret) + goto fail; + + ret = i915_gem_object_get_pages(obj); + if (ret) + goto fail; + + i915_gem_object_pin_pages(obj); + sg = obj-pages; + bytes = sg_copy_from_buffer(sg-sgl, sg-nents, (void *)data, size); + i915_gem_object_unpin_pages(obj); + + if (WARN_ON(bytes != size)) { + DRM_ERROR(Incomplete copy, wrote %zu of %zu, bytes, size); + ret = -EFAULT; + goto fail; + } + + return obj; + +fail: + drm_gem_object_unreference(obj-base); + return ERR_PTR(ret); +} -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 03/13 v4] drm/i915: Add GuC-related header files
On Thu, Jul 09, 2015 at 07:29:04PM +0100, Dave Gordon wrote: intel_guc_fwif.h contains the subset of the GuC interface that we will need for submission of commands through the GuC. These MUST be kept in sync with the definitions used by the GuC firmware, and updates to this file will (or should) be autogenerated from the source files used to build the firmware. Editing this file is therefore not recommended. i915_guc_reg.h contains definitions of GuC-related hardware: registers, bitmasks, etc. These should match the BSpec. v2: Files renamed resliced per review comments by Chris Wilson v4: Added DON'T-EDIT-ME warning [Tom O'Rourke] Issue: VIZ-4884 Signed-off-by: Alex Dai yu@intel.com Signed-off-by: Dave Gordon david.s.gor...@intel.com --- Reviewed-by: Tom O'Rourke Tom.O'rou...@intel.com drivers/gpu/drm/i915/i915_guc_reg.h | 102 ++ drivers/gpu/drm/i915/intel_guc_fwif.h | 245 ++ 2 files changed, 347 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_guc_reg.h create mode 100644 drivers/gpu/drm/i915/intel_guc_fwif.h diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/i915_guc_reg.h new file mode 100644 index 000..ccdc6c8 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_guc_reg.h @@ -0,0 +1,102 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ +#ifndef _I915_GUC_REG_H_ +#define _I915_GUC_REG_H_ + +/* Definitions of GuC H/W registers, bits, etc */ + +#define GUC_STATUS 0xc000 +#define GS_BOOTROM_SHIFT 1 +#define GS_BOOTROM_MASK (0x7F GS_BOOTROM_SHIFT) +#define GS_BOOTROM_RSA_FAILED(0x50 GS_BOOTROM_SHIFT) +#define GS_UKERNEL_SHIFT 8 +#define GS_UKERNEL_MASK (0xFF GS_UKERNEL_SHIFT) +#define GS_UKERNEL_LAPIC_DONE(0x30 GS_UKERNEL_SHIFT) +#define GS_UKERNEL_DPC_ERROR (0x60 GS_UKERNEL_SHIFT) +#define GS_UKERNEL_READY (0xF0 GS_UKERNEL_SHIFT) +#define GS_MIA_SHIFT 16 +#define GS_MIA_MASK (0x07 GS_MIA_SHIFT) + +#define GUC_WOPCM_SIZE 0xc050 +#define GUC_WOPCM_SIZE_VALUE (0x80 12) /* 512KB */ +#define GUC_WOPCM_OFFSET 0x8 /* 512KB */ + +#define SOFT_SCRATCH(n) (0xc180 + ((n) * 4)) + +#define UOS_RSA_SCRATCH_00xc200 +#define DMA_ADDR_0_LOW 0xc300 +#define DMA_ADDR_0_HIGH 0xc304 +#define DMA_ADDR_1_LOW 0xc308 +#define DMA_ADDR_1_HIGH 0xc30c +#define DMA_ADDRESS_SPACE_WOPCM (7 16) +#define DMA_ADDRESS_SPACE_GTT(8 16) +#define DMA_COPY_SIZE0xc310 +#define DMA_CTRL 0xc314 +#define UOS_MOVE (14) +#define START_DMA(10) +#define DMA_GUC_WOPCM_OFFSET 0xc340 + +#define GEN8_GT_PM_CONFIG0x138140 +#define GEN9_GT_PM_CONFIG0x13816c +#define GEN8_GT_DOORBELL_ENABLE (10) + +#define GEN8_GTCR0x4274 +#define GEN8_GTCR_INVALIDATE (10) + +#define GUC_ARAT_C6DIS 0xA178 + +#define GUC_SHIM_CONTROL 0xc064 +#define GUC_DISABLE_SRAM_INIT_TO_ZEROES(10) +#define GUC_ENABLE_READ_CACHE_LOGIC(11) +#define GUC_ENABLE_MIA_CACHING (12) +#define GUC_GEN10_MSGCH_ENABLE (14) +#define GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA(19) +#define GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA (110) +#define GUC_ENABLE_MIA_CLOCK_GATING(115) +#define GUC_GEN10_SHIM_WC_ENABLE
Re: [Intel-gfx] [PATCH 02/13 v4] drm/i915: Add GuC-related module parameters
On Thu, Jul 09, 2015 at 07:29:03PM +0100, Dave Gordon wrote: From: Alex Dai yu@intel.com Two new module parameters: enable_guc_submission which will turn on submission of batchbuffers via the GuC (when implemented), and guc_log_level which controls the level of debugging logged by the GuC and captured by the host. Signed-off-by: Alex Dai yu@intel.com v4: Mark enable_guc_submission unsafe [Daniel Vetter] Signed-off-by: Dave Gordon david.s.gor...@intel.com --- Reviewed-by: Tom O'Rourke Tom.O'rou...@intel.com drivers/gpu/drm/i915/i915_drv.h| 2 ++ drivers/gpu/drm/i915/i915_params.c | 9 + 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3c91507..4a512da 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2606,6 +2606,8 @@ struct i915_params { bool reset; bool disable_display; bool disable_vtd_wa; + bool enable_guc_submission; + int guc_log_level; int use_mmio_flip; int mmio_debug; bool verbose_state_checks; diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 7983fe4..2791b5a 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -53,6 +53,8 @@ struct i915_params i915 __read_mostly = { .verbose_state_checks = 1, .nuclear_pageflip = 0, .edp_vswing = 0, + .enable_guc_submission = false, + .guc_log_level = -1, }; module_param_named(modeset, i915.modeset, int, 0400); @@ -186,3 +188,10 @@ MODULE_PARM_DESC(edp_vswing, Ignore/Override vswing pre-emph table selection from VBT (0=use value from vbt [default], 1=low power swing(200mV), 2=default swing(400mV))); + +module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, bool, 0400); +MODULE_PARM_DESC(enable_guc_submission, Enable GuC submission (default:false)); + +module_param_named(guc_log_level, i915.guc_log_level, int, 0400); +MODULE_PARM_DESC(guc_log_level, + GuC firmware logging level (-1:disabled (default), 0-3:enabled)); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 00/13 v4] Batch submission via GuC
On Thu, Jul 09, 2015 at 07:29:01PM +0100, Dave Gordon wrote: This patch series enables command submission via the GuC. In this mode, instead of the host CPU driving the execlist port directly, it hands over work items to the GuC, using a doorbell mechanism to tell the GuC that new items have been added to its work queue. The GuC then dispatches contexts to the various GPU engines, and manages the resulting context- switch interrupts. Completion of a batch is however still signalled to the CPU; the GuC is not involved in handling user interrupts. There are two subsequences within the patch series: drm/i915: Add i915_gem_object_create_from_data() drm/i915: Add GuC-related module parameters drm/i915: Add GuC-related header files drm/i915: GuC-specific firmware loader drm/i915: Debugfs interface to read GuC load status These five patches make up the GuC loader and its prerequisites. At this point in the sequence we can load and activate the GuC firmware, but not submit any batches through it. (This is nonetheless a potentially useful state, as the GuC could do other useful work even when not handling batch submissions). drm/i915: Expose two LRC functions for GuC submission mode drm/i915: GuC submission setup, phase 1 drm/i915: Enable GuC firmware log drm/i915: Implementation of GuC client drm/i915: Interrupt routing for GuC submission drm/i915: Integrate GuC-based command submission drm/i915: Debugfs interface for GuC submission statistics drm/i915: Enable GuC submission, where supported In this second section, we implement the GuC submission mechanism, link it into the (execlist-based) submission path, and finally enable it (on supported platforms). On platforms where there is no GuC, or if GuC submission is explicitly disabled, batch submission will revert to using the execlist mechanism directly. On the other hand, if the GuC firmware cannot be found or is invalid, the GPU will be unusable. The GuC firmware itself is not included in this patchset; it is or will be available for download from https://01.org/linuxgraphics/downloads/ This driver works with and requires GuC firmware revision 3.x. It will not work with any firmware version 1.x, as the GuC protocol in those revisions was incompatible and is no longer supported. [TOR:] I finished reviewing the first 5 patches for GuC firmware loading. These patches look ready to go. Should we wait until the GuC version 3 firmware is available from 01.org before merging? I am still working on the second section for GuC submission. Thanks, Tom Ben Widawsky (0): Vinit Azad (0): Michael H. Nguyen (0): created the original versions on which some of these patches are based. Alex Dai (6): drm/i915: Add GuC-related module parameters drm/i915: GuC-specific firmware loader drm/i915: Debugfs interface to read GuC load status drm/i915: GuC submission setup, phase 1 drm/i915: Enable GuC firmware log drm/i915: Integrate GuC-based command submission Dave Gordon (7): drm/i915: Add i915_gem_object_create_from_data() drm/i915: Add GuC-related header files drm/i915: Expose two LRC functions for GuC submission mode drm/i915: Implementation of GuC client drm/i915: Interrupt routing for GuC submission drm/i915: Debugfs interface for GuC submission statistics drm/i915: Enable GuC submission, where supported Documentation/DocBook/drm.tmpl | 14 + drivers/gpu/drm/i915/Makefile | 4 + drivers/gpu/drm/i915/i915_debugfs.c| 110 +++- drivers/gpu/drm/i915/i915_dma.c| 4 + drivers/gpu/drm/i915/i915_drv.h| 15 + drivers/gpu/drm/i915/i915_gem.c| 53 ++ drivers/gpu/drm/i915/i915_guc_reg.h| 102 drivers/gpu/drm/i915/i915_guc_submission.c | 853 + drivers/gpu/drm/i915/i915_params.c | 9 + drivers/gpu/drm/i915/i915_reg.h| 15 +- drivers/gpu/drm/i915/intel_guc.h | 118 drivers/gpu/drm/i915/intel_guc_fwif.h | 245 + drivers/gpu/drm/i915/intel_guc_loader.c| 618 + drivers/gpu/drm/i915/intel_lrc.c | 72 ++- drivers/gpu/drm/i915/intel_lrc.h | 9 + 15 files changed, 2211 insertions(+), 30 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_guc_reg.h create mode 100644 drivers/gpu/drm/i915/i915_guc_submission.c create mode 100644 drivers/gpu/drm/i915/intel_guc.h create mode 100644 drivers/gpu/drm/i915/intel_guc_fwif.h create mode 100644 drivers/gpu/drm/i915/intel_guc_loader.c -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] Updated drm-intel-testing
Hi all, New -testing cycle with cool stuff: - prelim hw support dropped for skl after Damien fixed an ABI issue around planes - legacy modesetting is done using atomic infrastructure now (Maarten)! - more gen9 workarounds (ArunNick) - MOCS programming (cache control for better performance) for skl/bxt - vlv/chv dpll improvements (Ville) - PSR fixes from Rodrigo - fbc improvements from Paulo - plumb requests into execlist submit functions (Mika) - opregion code cleanup from Jani - resource streamer support from Abdiel for mesa - final fixes for 12bpc hdmi + enabling support from Ville Happy testing! Cheers, Daniel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2 2/4] drm/i915: Add provision to extend Golden context batch
On Fri, Jul 17, 2015 at 07:13:32PM +0100, Arun Siluvery wrote: The Golden batch carries 3D state at the beginning so that HW starts with a known state. It is carried as a binary blob which is auto-generated from source. The idea was it would be easier to maintain and keep the complexity out of the kernel which makes sense as we don't really touch it. However if you really need to update it then you need to update generator source and keep the binary blob in sync with it. There is a need to patch this in bxt to send one additional command to enable a feature. A solution was to patch the binary data with some additional data structures (included as part of auto-generator source) but it was unnecessarily complicated. Chris suggested the idea of having a secondary batch and execute two batch buffers. It has clear advantages as we needn't touch the base golden batch, can customize secondary/auxiliary batch depending on Gen and can be carried in the driver with no dependencies. This patch adds support for this auxiliary batch which is inserted at the end of golden batch and is completely independent from it. Thanks to Mika for the preliminary review. v2: Strictly conform to the batch size requirements to cover Gen2 and add comments to clarify overflow check in macro (Chris, Mika). Cc: Mika Kuoppala mika.kuopp...@intel.com Cc: Chris Wilson ch...@chris-wilson.co.uk Cc: Armin Reese armin.c.re...@intel.com Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_render_state.c | 45 drivers/gpu/drm/i915/i915_gem_render_state.h | 2 ++ drivers/gpu/drm/i915/intel_lrc.c | 6 3 files changed, 53 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index b6492fe..5026a62 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -73,6 +73,24 @@ free_gem: return ret; } +/* + * Macro to add commands to auxiliary batch. + * This macro only checks for page overflow before inserting the commands, + * this is sufficient as the null state generator makes the final batch + * with two passes to build command and state separately. At this point + * the size of both are known and it compacts them by relocating the state + * right after the commands taking care of aligment so we should sufficient + * space below them for adding new commands. + */ +#define OUT_BATCH(batch, i, val) \ + do {\ + if (WARN_ON((i) = PAGE_SIZE / sizeof(u32))) { \ + ret = -ENOSPC; \ + goto err_out; \ + } \ + (batch)[(i)++] = (val); \ + } while(0) + static int render_state_setup(struct render_state *so) { const struct intel_renderstate_rodata *rodata = so-rodata; @@ -110,6 +128,21 @@ static int render_state_setup(struct render_state *so) d[i++] = s; } + + while (i % CACHELINE_DWORDS) + OUT_BATCH(d, i, MI_NOOP); + + so-aux_batch_offset = i * sizeof(u32); + + OUT_BATCH(d, i, MI_BATCH_BUFFER_END); + so-aux_batch_size = (i * sizeof(u32)) - so-aux_batch_offset; + + /* + * Since we are sending length, we need to strictly conform to + * all requirements. For Gen2 this must be a multiple of 8. + */ + so-aux_batch_size = ALIGN(so-aux_batch_size, 8); + kunmap(page); ret = i915_gem_object_set_to_gtt_domain(so-obj, false); @@ -128,6 +161,8 @@ err_out: return ret; } +#undef OUT_BATCH + void i915_gem_render_state_fini(struct render_state *so) { i915_gem_object_ggtt_unpin(so-obj); @@ -176,6 +211,16 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) if (ret) goto out; + if (so.aux_batch_size 8) { + ret = req-ring-dispatch_execbuffer(req, + (so.ggtt_offset + + so.aux_batch_offset), + so.aux_batch_size, + I915_DISPATCH_SECURE); + if (ret) + goto out; + } + i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); out: diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index 7aa7372..79de101 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -37,6 +37,8 @@ struct render_state { struct drm_i915_gem_object *obj; u64 ggtt_offset;
Re: [Intel-gfx] [PATCH] drm/i915/bxt: WA for swapped HPD pins in A stepping
On Fri, 2015-07-17 at 13:47 +0530, Sonika Jindal wrote: As per bspec, on BXT A0/A1, sw needs to activate DDIA HPD logic and interrupts to check the external panel connection and DDIC HPD logic for edp panel. v2: For DP, irq_port is used to determine the encoder instead of hpd_pin and removing the edp HPD logic because port A HPD is not present(Imre) Signed-off-by: Sonika Jindal sonika.jin...@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 10 +- drivers/gpu/drm/i915/intel_hdmi.c |9 - 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index e2c6f73..777e3a3 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -3225,7 +3225,15 @@ void intel_ddi_init(struct drm_device *dev, enum port port) goto err; intel_dig_port-hpd_pulse = intel_dp_hpd_pulse; - dev_priv-hotplug.irq_port[port] = intel_dig_port; + /* + * On BXT A0/A1, sw needs to activate DDIA HPD logic and + * interrupts to check the external panel connection. + */ + if (IS_BROXTON(dev_priv) (INTEL_REVID(dev) BXT_REVID_B0) + port == PORT_B) + dev_priv-hotplug.irq_port[PORT_A] = intel_dig_port; This happens to work but is confusing. irq_port[PORT_A] will be set here already and the above will simply overwrite it without explanation. I would also handle the port == PORT_A case and not set irq_port for it. The same swapping for hpd_pin is missing from intel_dp_init_connector(). + else + dev_priv-hotplug.irq_port[port] = intel_dig_port; } /* In theory we don't need the encoder-type check, but leave it just in diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 70bad5b..94fa716 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1973,7 +1973,14 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, intel_hdmi-ddc_bus = GMBUS_PIN_1_BXT; else intel_hdmi-ddc_bus = GMBUS_PIN_DPB; - intel_encoder-hpd_pin = HPD_PORT_B; + /* + * On BXT A0/A1, sw needs to activate DDIA HPD logic and + * interrupts to check the external panel connection. + */ + if (IS_BROXTON(dev_priv) (INTEL_REVID(dev) BXT_REVID_B0)) + intel_encoder-hpd_pin = HPD_PORT_A; + else + intel_encoder-hpd_pin = HPD_PORT_B; break; case PORT_C: if (IS_BROXTON(dev_priv)) As I earlier pointed out with the above approach, you need to add support for HPD events on the HPD_PORT_A pin. If you look at the for_each_hpd_pin() macro and intel_hpd_irq_handler()/is_dig_port you'll notice that any interrupt event on the HPD_PORT_A pin will be ignored now. --Imre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 05/13 v4] drm/i915: Debugfs interface to read GuC load status
On Thu, Jul 09, 2015 at 07:29:06PM +0100, Dave Gordon wrote: From: Alex Dai yu@intel.com The new node provides access to the status of the GuC-specific loader; also the scratch registers used for communication between the i915 driver and the GuC firmware. v2: Changes to output formats per Chris Wilson's suggestions v4: Rebased Issue: VIZ-4884 Signed-off-by: Alex Dai yu@intel.com Signed-off-by: Dave Gordon david.s.gor...@intel.com --- Reviewed-by: Tom O'Rourke Tom.O'rou...@intel.com drivers/gpu/drm/i915/i915_debugfs.c | 39 + 1 file changed, 39 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 98fd3c9..9ff5f17 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2359,6 +2359,44 @@ static int i915_llc(struct seq_file *m, void *data) return 0; } +static int i915_guc_load_status_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m-private; + struct drm_i915_private *dev_priv = node-minor-dev-dev_private; + struct intel_guc_fw *guc_fw = dev_priv-guc.guc_fw; + u32 tmp, i; + + if (!HAS_GUC_UCODE(dev_priv-dev)) + return 0; + + seq_printf(m, GuC firmware status:\n); + seq_printf(m, \tpath: %s\n, + guc_fw-guc_fw_path); + seq_printf(m, \tfetch: %s\n, + intel_guc_fw_status_repr(guc_fw-guc_fw_fetch_status)); + seq_printf(m, \tload: %s\n, + intel_guc_fw_status_repr(guc_fw-guc_fw_load_status)); + seq_printf(m, \tversion wanted: %d.%d\n, + guc_fw-guc_fw_major_wanted, guc_fw-guc_fw_minor_wanted); + seq_printf(m, \tversion found: %d.%d\n, + guc_fw-guc_fw_major_found, guc_fw-guc_fw_minor_found); + + tmp = I915_READ(GUC_STATUS); + + seq_printf(m, \nGuC status 0x%08x:\n, tmp); + seq_printf(m, \tBootrom status = 0x%x\n, + (tmp GS_BOOTROM_MASK) GS_BOOTROM_SHIFT); + seq_printf(m, \tuKernel status = 0x%x\n, + (tmp GS_UKERNEL_MASK) GS_UKERNEL_SHIFT); + seq_printf(m, \tMIA Core status = 0x%x\n, + (tmp GS_MIA_MASK) GS_MIA_SHIFT); + seq_puts(m, \nScratch registers:\n); + for (i = 0; i 16; i++) + seq_printf(m, \t%2d: \t0x%x\n, i, I915_READ(SOFT_SCRATCH(i))); + + return 0; +} + static int i915_edp_psr_status(struct seq_file *m, void *data) { struct drm_info_node *node = m-private; @@ -5073,6 +5111,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {i915_gem_hws_bsd, i915_hws_info, 0, (void *)VCS}, {i915_gem_hws_vebox, i915_hws_info, 0, (void *)VECS}, {i915_gem_batch_pool, i915_gem_batch_pool_info, 0}, + {i915_guc_load_status, i915_guc_load_status_info, 0}, {i915_frequency_info, i915_frequency_info, 0}, {i915_hangcheck_info, i915_hangcheck_info, 0}, {i915_drpc_info, i915_drpc_info, 0}, -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx