[Intel-gfx] [RFC 7/9] drm/i915: Interrupt driven fences

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The intended usage model for struct fence is that the signalled status should be
set on demand rather than polled. That is, there should not be a need for a
'signaled' function to be called everytime the status is queried. Instead,
'something' should be done to enable a signal callback from the hardware which
will update the state directly. In the case of requests, this is the seqno
update interrupt. The idea is that this callback will only be enabled on demand
when something actually tries to wait on the fence.

This change removes the polling test and replaces it with the callback scheme.
Each fence is added to a 'please poke me' list at the start of
i915_add_request(). The interrupt handler then scans through the 'poke me' list
when a new seqno pops out and signals any matching fence/request. The fence is
then removed from the list so the entire request stack does not need to be
scanned every time. Note that the fence is added to the list before the commands
to generate the seqno interrupt are added to the ring. Thus the sequence is
guaranteed to be race free if the interrupt is already enabled.

Note that the interrupt is only enabled on demand (i.e. when __wait_request() is
called). Thus there is still a potential race when enabling the interrupt as the
request may already have completed. However, this is simply solved by calling
the interrupt processing code immediately after enabling the interrupt and
thereby checking for already completed requests.

Lastly, the ring clean up code has the possibility to cancel outstanding
requests (e.g. because TDR has reset the ring). These requests will never get
signalled and so must be removed from the signal list manually. This is done by
setting a 'cancelled' flag and then calling the regular notify/retire code path
rather than attempting to duplicate the list manipulatation and clean up code in
multiple places. This also avoid any race condition where the cancellation
request might occur after/during the completion interrupt actually arriving.

v2: Updated to take advantage of the request unreference no longer requiring the
mutex lock.

For: VIZ-5190
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h |   8 ++
 drivers/gpu/drm/i915/i915_gem.c | 132 +---
 drivers/gpu/drm/i915/i915_irq.c |   2 +
 drivers/gpu/drm/i915/intel_lrc.c|   1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |   1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |   1 +
 6 files changed, 136 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 61c3db2..d7f1aa5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2163,7 +2163,11 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 struct drm_i915_gem_request {
/** Underlying object for implementing the signal/wait stuff. */
struct fence fence;
+   struct list_head signal_list;
+   struct list_head unsignal_list;
struct list_head delay_free_list;
+   bool cancelled;
+   bool irq_enabled;
 
/** On Which ring this request was generated */
struct drm_i915_private *i915;
@@ -2241,6 +2245,10 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
   struct drm_i915_gem_request **req_out);
 void i915_gem_request_cancel(struct drm_i915_gem_request *req);
 
+void i915_gem_request_submit(struct drm_i915_gem_request *req);
+void i915_gem_request_enable_interrupt(struct drm_i915_gem_request *req);
+void i915_gem_request_notify(struct intel_engine_cs *ring);
+
 int i915_create_fence_timeline(struct drm_device *dev,
   struct intel_context *ctx,
   struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 482835a..7c589a9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1222,6 +1222,11 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
if (list_empty(req-list))
return 0;
 
+   /*
+* Enable interrupt completion of the request.
+*/
+   i915_gem_request_enable_interrupt(req);
+
if (i915_gem_request_completed(req))
return 0;
 
@@ -1382,6 +1387,10 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
list_del_init(request-list);
i915_gem_request_remove_from_client(request);
 
+   /* In case the request is still in the signal pending list */
+   if (!list_empty(request-signal_list))
+   request-cancelled = true;
+
i915_gem_request_unreference(request);
 }
 
@@ -2534,6 +2543,12 @@ void __i915_add_request(struct drm_i915_gem_request 
*request,
 */
request-postfix = intel_ring_get_tail(ringbuf);
 
+   /*
+* Add the 

[Intel-gfx] [RFC 6/9] drm/i915: Delay the freeing of requests until retire time

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The request structure is reference counted. When the count reached
zero, the request was immediately freed and all associated objects
were unrefereced/unallocated. This meant that the driver mutex lock
must be held at the point where the count reaches zero. This was fine
while all references were held internally to the driver. However, the
plan is to allow the underlying fence object (and hence the request
itself) to be returned to other drivers and to userland. External
users cannot be expected to acquire a driver private mutex lock.

Rather than attempt to disentangle the request structure from the
driver mutex lock, the decsion was to defer the free code until a
later (safer) point. Hence this patch changes the unreference callback
to merely move the request onto a delayed free list. The driver's
retire worker thread will then process the list and actually call the
free function on the requests.

[new patch in series]

For: VIZ-5190
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h | 22 +++---
 drivers/gpu/drm/i915/i915_gem.c | 41 +
 drivers/gpu/drm/i915/intel_display.c|  2 +-
 drivers/gpu/drm/i915/intel_lrc.c|  2 ++
 drivers/gpu/drm/i915/intel_pm.c |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |  2 ++
 drivers/gpu/drm/i915/intel_ringbuffer.h |  4 
 7 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 88a4746..61c3db2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2161,14 +2161,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
  * initial reference taken using kref_init
  */
 struct drm_i915_gem_request {
-   /**
-* Underlying object for implementing the signal/wait stuff.
-* NB: Never return this fence object to user land! It is unsafe to
-* let anything outside of the i915 driver get hold of the fence
-* object as the clean up when decrementing the reference count
-* requires holding the driver mutex lock.
-*/
+   /** Underlying object for implementing the signal/wait stuff. */
struct fence fence;
+   struct list_head delay_free_list;
 
/** On Which ring this request was generated */
struct drm_i915_private *i915;
@@ -2281,21 +2276,10 @@ i915_gem_request_reference(struct drm_i915_gem_request 
*req)
 static inline void
 i915_gem_request_unreference(struct drm_i915_gem_request *req)
 {
-   WARN_ON(!mutex_is_locked(req-ring-dev-struct_mutex));
-   fence_put(req-fence);
-}
-
-static inline void
-i915_gem_request_unreference__unlocked(struct drm_i915_gem_request *req)
-{
-   struct drm_device *dev;
-
if (!req)
return;
 
-   dev = req-ring-dev;
-   if (kref_put_mutex(req-fence.refcount, fence_release, 
dev-struct_mutex))
-   mutex_unlock(dev-struct_mutex);
+   fence_put(req-fence);
 }
 
 static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index af79716..482835a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2616,10 +2616,27 @@ static void i915_set_reset_status(struct 
drm_i915_private *dev_priv,
}
 }
 
-static void i915_gem_request_free(struct fence *req_fence)
+static void i915_gem_request_release(struct fence *req_fence)
 {
struct drm_i915_gem_request *req = container_of(req_fence,
 typeof(*req), fence);
+   struct intel_engine_cs *ring = req-ring;
+   struct drm_i915_private *dev_priv = to_i915(ring-dev);
+   unsigned long flags;
+
+   /*
+* Need to add the request to a deferred dereference list to be
+* processed at a mutex lock safe time.
+*/
+   spin_lock_irqsave(ring-delayed_free_lock, flags);
+   list_add_tail(req-delay_free_list, ring-delayed_free_list);
+   spin_unlock_irqrestore(ring-delayed_free_lock, flags);
+
+   queue_delayed_work(dev_priv-wq, dev_priv-mm.retire_work, 0);
+}
+
+static void i915_gem_request_free(struct drm_i915_gem_request *req)
+{
struct intel_context *ctx = req-ctx;
 
BUG_ON(!mutex_is_locked(req-ring-dev-struct_mutex));
@@ -2696,7 +2713,7 @@ static const struct fence_ops i915_gem_request_fops = {
.enable_signaling   = i915_gem_request_enable_signaling,
.signaled   = i915_gem_request_is_completed,
.wait   = fence_default_wait,
-   .release= i915_gem_request_free,
+   .release= i915_gem_request_release,
.fence_value_str= i915_fence_value_str,
.timeline_value_str = i915_fence_timeline_value_str,
 };
@@ -2992,6 +3009,21 @@ 

[Intel-gfx] [RFC 0/9] Convert requests to use struct fence

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

There is a construct in the linux kernel called 'struct fence' that is intended
to keep track of work that is executed on hardware. I.e. it solves the basic
problem that the drivers 'struct drm_i915_gem_request' is trying to address. The
request structure does quite a lot more than simply track the execution progress
so is very definitely still required. However, the basic completion status side
could be updated to use the ready made fence implementation and gain all the
advantages that provides.

Using the struct fence object also has the advantage that the fence can be used
outside of the i915 driver (by other drivers or by userland applications). That
is the basis of the dma-buff synchronisation API and allows asynchronous
tracking of work completion. In this case, it allows applications to be
signalled directly when a batch buffer completes without having to make an IOCTL
call into the driver.

This is work that was planned since the conversion of the driver from being
seqno value based to being request structure based. This patch series does that
work.

[Patches against drm-intel-nightly tree fetched 15/07/2015]

John Harrison (7):
  drm/i915: Convert requests to use struct fence
  drm/i915: Removed now redudant parameter to i915_gem_request_completed()
  drm/i915: Add per context timelines to fence object
  drm/i915: Delay the freeing of requests until retire time
  drm/i915: Interrupt driven fences
  drm/i915: Updated request structure tracing
  drm/i915: Add sync framework support to execbuff IOCTL

Maarten Lankhorst (1):
  android: add sync_fence_create_dma

Tvrtko Ursulin (1):
  staging/android/sync: Support sync points created from dma-fences

 drivers/gpu/drm/i915/i915_debugfs.c|   2 +-
 drivers/gpu/drm/i915/i915_drv.h|  73 +++---
 drivers/gpu/drm/i915/i915_gem.c| 369 +++--
 drivers/gpu/drm/i915/i915_gem_context.c|  15 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  90 ++-
 drivers/gpu/drm/i915/i915_irq.c|   2 +-
 drivers/gpu/drm/i915/i915_trace.h  |   7 +-
 drivers/gpu/drm/i915/intel_display.c   |   4 +-
 drivers/gpu/drm/i915/intel_lrc.c   |  12 +
 drivers/gpu/drm/i915/intel_pm.c|   6 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c|   4 +
 drivers/gpu/drm/i915/intel_ringbuffer.h|   7 +
 drivers/staging/android/sync.c |  13 +-
 drivers/staging/android/sync.h |  12 +-
 drivers/staging/android/sync_debug.c   |  42 ++--
 include/uapi/drm/i915_drm.h|  16 +-
 16 files changed, 583 insertions(+), 91 deletions(-)

-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 9/9] drm/i915: Add sync framework support to execbuff IOCTL

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

Various projects desire a mechanism for managing dependencies between
work items asynchronously. This can also include work items across
complete different and independent systems. For example, an
application wants to retreive a frame from a video in device,
using it for rendering on a GPU then send it to the video out device
for display all without having to stall waiting for completion along
the way. The sync framework allows this. It encapsulates
synchronisation events in file descriptors. The application can
request a sync point for the completion of each piece of work. Drivers
should also take sync points in with each new work request and not
schedule the work to start until the sync has been signalled.

This patch adds sync framework support to the exec buffer IOCTL. A
sync point can be passed in to stall execution of the batch buffer
until signalled. And a sync point can be returned after each batch
buffer submission which will be signalled upon that batch buffer's
completion.

At present, the input sync point is simply waited on synchronously
inside the exec buffer IOCTL call. Once the GPU scheduler arrives,
this will be handled asynchronously inside the scheduler and the IOCTL
can return without having to wait.

Note also that the scheduler will re-order the execution of batch
buffers, e.g. because a batch buffer is stalled on a sync point and
cannot be submitted yet but other, independent, batch buffers are
being presented to the driver. This means that the timeline within the
sync points returned cannot be global to the engine. Instead they must
be kept per context per engine (the scheduler may not re-order batches
within a context). Hence the timeline cannot be based on the existing
seqno values but must be a new implementation.

This patch is a port of work by several people that has been pulled
across from Android. It has been updated several times across several
patches. Rather than attempt to port each individual patch, this
version is the finished product as a single patch. The various
contributors/authors along the way (in addition to myself) were:
  Satyanantha RamaGopal M rama.gopal.m.satyanan...@intel.com
  Tvrtko Ursulin tvrtko.ursu...@intel.com
  Michel Thierry michel.thie...@intel.com
  Arun Siluvery arun.siluv...@linux.intel.com

[new patch in series]

For: VIZ-5190
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h|  6 ++
 drivers/gpu/drm/i915/i915_gem.c| 84 
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 90 --
 include/uapi/drm/i915_drm.h| 16 +-
 4 files changed, 188 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d7f1aa5..cf6b7cd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2168,6 +2168,7 @@ struct drm_i915_gem_request {
struct list_head delay_free_list;
bool cancelled;
bool irq_enabled;
+   bool fence_external;
 
/** On Which ring this request was generated */
struct drm_i915_private *i915;
@@ -2252,6 +2253,11 @@ void i915_gem_request_notify(struct intel_engine_cs 
*ring);
 int i915_create_fence_timeline(struct drm_device *dev,
   struct intel_context *ctx,
   struct intel_engine_cs *ring);
+#ifdef CONFIG_SYNC
+struct sync_fence;
+int i915_create_sync_fence(struct drm_i915_gem_request *req, int *fence_fd);
+bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct sync_fence 
*fence);
+#endif
 
 static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3f20087..de93422 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -37,6 +37,9 @@
 #include linux/swap.h
 #include linux/pci.h
 #include linux/dma-buf.h
+#ifdef CONFIG_SYNC
+#include ../drivers/staging/android/sync.h
+#endif
 
 #define RQ_BUG_ON(expr)
 
@@ -2549,6 +2552,15 @@ void __i915_add_request(struct drm_i915_gem_request 
*request,
 */
i915_gem_request_submit(request);
 
+   /*
+* If an external sync point has been requested for this request then
+* it can be waited on without the driver's knowledge, i.e. without
+* calling __i915_wait_request(). Thus interrupts must be enabled
+* from the start rather than only on demand.
+*/
+   if (request-fence_external)
+   i915_gem_request_enable_interrupt(request);
+
if (i915.enable_execlists)
ret = ring-emit_request(request);
else {
@@ -2857,6 +2869,78 @@ static uint32_t 
i915_fence_timeline_get_next_seqno(struct i915_fence_timeline *t
return seqno;
 }
 
+#ifdef CONFIG_SYNC
+int i915_create_sync_fence(struct 

[Intel-gfx] [RFC 3/9] drm/i915: Convert requests to use struct fence

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

There is a construct in the linux kernel called 'struct fence' that is intended
to keep track of work that is executed on hardware. I.e. it solves the basic
problem that the drivers 'struct drm_i915_gem_request' is trying to address. The
request structure does quite a lot more than simply track the execution progress
so is very definitely still required. However, the basic completion status side
could be updated to use the ready made fence implementation and gain all the
advantages that provides.

This patch makes the first step of integrating a struct fence into the request.
It replaces the explicit reference count with that of the fence. It also
replaces the 'is completed' test with the fence's equivalent. Currently, that
simply chains on to the original request implementation. A future patch will
improve this.

For: VIZ-5190
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h | 45 +
 drivers/gpu/drm/i915/i915_gem.c | 58 ++---
 drivers/gpu/drm/i915/intel_lrc.c|  1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |  1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
 5 files changed, 80 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cf6761c..79d346c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -50,6 +50,7 @@
 #include linux/intel-iommu.h
 #include linux/kref.h
 #include linux/pm_qos.h
+#include linux/fence.h
 
 /* General customization:
  */
@@ -2150,7 +2151,17 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
  * initial reference taken using kref_init
  */
 struct drm_i915_gem_request {
-   struct kref ref;
+   /**
+* Underlying object for implementing the signal/wait stuff.
+* NB: Never call fence_later() or return this fence object to user
+* land! Due to lazy allocation, scheduler re-ordering, pre-emption,
+* etc., there is no guarantee at all about the validity or
+* sequentiality of the fence's seqno! It is also unsafe to let
+* anything outside of the i915 driver get hold of the fence object
+* as the clean up when decrementing the reference count requires
+* holding the driver mutex lock.
+*/
+   struct fence fence;
 
/** On Which ring this request was generated */
struct drm_i915_private *i915;
@@ -2227,7 +2238,13 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
   struct intel_context *ctx,
   struct drm_i915_gem_request **req_out);
 void i915_gem_request_cancel(struct drm_i915_gem_request *req);
-void i915_gem_request_free(struct kref *req_ref);
+
+static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
+ bool lazy_coherency)
+{
+   return fence_is_signaled(req-fence);
+}
+
 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
   struct drm_file *file);
 
@@ -2247,7 +2264,7 @@ static inline struct drm_i915_gem_request *
 i915_gem_request_reference(struct drm_i915_gem_request *req)
 {
if (req)
-   kref_get(req-ref);
+   fence_get(req-fence);
return req;
 }
 
@@ -2255,7 +2272,7 @@ static inline void
 i915_gem_request_unreference(struct drm_i915_gem_request *req)
 {
WARN_ON(!mutex_is_locked(req-ring-dev-struct_mutex));
-   kref_put(req-ref, i915_gem_request_free);
+   fence_put(req-fence);
 }
 
 static inline void
@@ -2267,7 +2284,7 @@ i915_gem_request_unreference__unlocked(struct 
drm_i915_gem_request *req)
return;
 
dev = req-ring-dev;
-   if (kref_put_mutex(req-ref, i915_gem_request_free, 
dev-struct_mutex))
+   if (kref_put_mutex(req-fence.refcount, fence_release, 
dev-struct_mutex))
mutex_unlock(dev-struct_mutex);
 }
 
@@ -2284,12 +2301,6 @@ static inline void i915_gem_request_assign(struct 
drm_i915_gem_request **pdst,
 }
 
 /*
- * XXX: i915_gem_request_completed should be here but currently needs the
- * definition of i915_seqno_passed() which is below. It will be moved in
- * a later patch when the call to i915_seqno_passed() is obsoleted...
- */
-
-/*
  * A command that requires special handling by the command parser.
  */
 struct drm_i915_cmd_descriptor {
@@ -2851,18 +2862,6 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
return (int32_t)(seq1 - seq2) = 0;
 }
 
-static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
- bool lazy_coherency)
-{
-   u32 seqno;
-
-   BUG_ON(req == NULL);
-
-   seqno = req-ring-get_seqno(req-ring, lazy_coherency);
-
-   return i915_seqno_passed(seqno, req-seqno);
-}
-
 int __must_check i915_gem_get_seqno(struct 

[Intel-gfx] [RFC 5/9] drm/i915: Add per context timelines to fence object

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The fence object used inside the request structure requires a sequence number.
Although this is not used by the i915 driver itself, it could potentially be
used by non-i915 code if the fence is passed outside of the driver. This is the
intention as it allows external kernel drivers and user applications to wait on
batch buffer completion asynchronously via the dma-buff fence API.

To ensure that such external users are not confused by strange things happening
with the seqno, this patch adds in a per context timeline that can provide a
guaranteed in-order seqno value for the fence. This is safe because the
scheduler will not re-order batch buffers within a context - they are considered
to be mutually dependent.

[new patch in series]

For: VIZ-5190
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h | 25 
 drivers/gpu/drm/i915/i915_gem.c | 69 ++---
 drivers/gpu/drm/i915/i915_gem_context.c | 15 ++-
 drivers/gpu/drm/i915/intel_lrc.c|  8 
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
 5 files changed, 103 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0c7df46..88a4746 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -840,6 +840,15 @@ struct i915_ctx_hang_stats {
bool banned;
 };
 
+struct i915_fence_timeline {
+   unsignedfence_context;
+   uint32_tcontext;
+   uint32_tnext;
+
+   struct intel_context *ctx;
+   struct intel_engine_cs *ring;
+};
+
 /* This must match up with the value previously used for execbuf2.rsvd1. */
 #define DEFAULT_CONTEXT_HANDLE 0
 
@@ -885,6 +894,7 @@ struct intel_context {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
int pin_count;
+   struct i915_fence_timeline fence_timeline;
} engine[I915_NUM_RINGS];
 
struct list_head link;
@@ -2153,13 +2163,10 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 struct drm_i915_gem_request {
/**
 * Underlying object for implementing the signal/wait stuff.
-* NB: Never call fence_later() or return this fence object to user
-* land! Due to lazy allocation, scheduler re-ordering, pre-emption,
-* etc., there is no guarantee at all about the validity or
-* sequentiality of the fence's seqno! It is also unsafe to let
-* anything outside of the i915 driver get hold of the fence object
-* as the clean up when decrementing the reference count requires
-* holding the driver mutex lock.
+* NB: Never return this fence object to user land! It is unsafe to
+* let anything outside of the i915 driver get hold of the fence
+* object as the clean up when decrementing the reference count
+* requires holding the driver mutex lock.
 */
struct fence fence;
 
@@ -2239,6 +2246,10 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
   struct drm_i915_gem_request **req_out);
 void i915_gem_request_cancel(struct drm_i915_gem_request *req);
 
+int i915_create_fence_timeline(struct drm_device *dev,
+  struct intel_context *ctx,
+  struct intel_engine_cs *ring);
+
 static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
 {
return fence_is_signaled(req-fence);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3970250..af79716 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2671,6 +2671,25 @@ static bool i915_gem_request_is_completed(struct fence 
*req_fence)
return i915_seqno_passed(seqno, req-seqno);
 }
 
+static void i915_fence_timeline_value_str(struct fence *fence, char *str, int 
size)
+{
+   struct drm_i915_gem_request *req;
+
+   req = container_of(fence, typeof(*req), fence);
+
+   /* Last signalled timeline value ??? */
+   snprintf(str, size, ? [%d]/*, tl-value*/, 
req-ring-get_seqno(req-ring, true));
+}
+
+static void i915_fence_value_str(struct fence *fence, char *str, int size)
+{
+   struct drm_i915_gem_request *req;
+
+   req = container_of(fence, typeof(*req), fence);
+
+   snprintf(str, size, %d [%d], req-fence.seqno, req-seqno);
+}
+
 static const struct fence_ops i915_gem_request_fops = {
.get_driver_name= i915_gem_request_get_driver_name,
.get_timeline_name  = i915_gem_request_get_timeline_name,
@@ -2678,8 +2697,48 @@ static const struct fence_ops i915_gem_request_fops = {
.signaled   = i915_gem_request_is_completed,
.wait   = fence_default_wait,
.release= i915_gem_request_free,
+   .fence_value_str

[Intel-gfx] [RFC 8/9] drm/i915: Updated request structure tracing

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

Added the '_complete' trace event which occurs when a fence/request is signaled
as complete. Also moved the notify event from the IRQ handler code to inside the
notify function itself.

For: VIZ-5190
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c   | 3 +++
 drivers/gpu/drm/i915/i915_irq.c   | 2 --
 drivers/gpu/drm/i915/i915_trace.h | 7 +--
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7c589a9..3f20087 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2752,6 +2752,8 @@ void i915_gem_request_notify(struct intel_engine_cs *ring)
u32 seqno;
LIST_HEAD(free_list);
 
+   trace_i915_gem_request_notify(ring);
+
if (list_empty(ring-fence_signal_list))
return;
 
@@ -2764,6 +2766,7 @@ void i915_gem_request_notify(struct intel_engine_cs *ring)
continue;
 
fence_signal_locked(req-fence);
+   trace_i915_gem_request_complete(req);
}
 
list_del_init(req-signal_list);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index e446509..d4500cc 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -851,8 +851,6 @@ static void notify_ring(struct intel_engine_cs *ring)
if (!intel_ring_initialized(ring))
return;
 
-   trace_i915_gem_request_notify(ring);
-
i915_gem_request_notify(ring);
 
wake_up_all(ring-irq_queue);
diff --git a/drivers/gpu/drm/i915/i915_trace.h 
b/drivers/gpu/drm/i915/i915_trace.h
index 2f34c47..f455194 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -532,16 +532,19 @@ TRACE_EVENT(i915_gem_request_notify,
 __field(u32, dev)
 __field(u32, ring)
 __field(u32, seqno)
+__field(bool, is_empty)
 ),
 
TP_fast_assign(
   __entry-dev = ring-dev-primary-index;
   __entry-ring = ring-id;
   __entry-seqno = ring-get_seqno(ring, false);
+  __entry-is_empty = 
list_empty(ring-fence_signal_list);
   ),
 
-   TP_printk(dev=%u, ring=%u, seqno=%u,
- __entry-dev, __entry-ring, __entry-seqno)
+   TP_printk(dev=%u, ring=%u, seqno=%u, empty=%d,
+ __entry-dev, __entry-ring, __entry-seqno,
+ __entry-is_empty)
 );
 
 DEFINE_EVENT(i915_gem_request, i915_gem_request_retire,
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 12/39] drm/i915: Added scheduler hook when closing DRM file handles

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The scheduler decouples the submission of batch buffers to the driver with
submission of batch buffers to the hardware. Thus it is possible for an
application to submit work, then close the DRM handle and free up all the
resources that piece of work wishes to use before the work has even been
submitted to the hardware. To prevent this, the scheduler needs to be informed
of the DRM close event so that it can force through any outstanding work
attributed to that file handle.

Change-Id: I24ac056c062b075ff1cc5e2ed2d3fa8e17e85951
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_dma.c   |  3 ++
 drivers/gpu/drm/i915/i915_scheduler.c | 66 +++
 drivers/gpu/drm/i915/i915_scheduler.h |  2 ++
 3 files changed, 71 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 5e63076..0a25017 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -47,6 +47,7 @@
 #include linux/vga_switcheroo.h
 #include linux/slab.h
 #include acpi/video.h
+#include i915_scheduler.h
 #include linux/pm.h
 #include linux/pm_runtime.h
 #include linux/oom.h
@@ -1186,6 +1187,8 @@ void i915_driver_lastclose(struct drm_device *dev)
 
 void i915_driver_preclose(struct drm_device *dev, struct drm_file *file)
 {
+   i915_scheduler_closefile(dev, file);
+
mutex_lock(dev-struct_mutex);
i915_gem_context_close(dev, file);
i915_gem_release(dev, file);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index f7fd9a4..50bcccb 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -812,3 +812,69 @@ bool i915_scheduler_is_request_tracked(struct 
drm_i915_gem_request *req,
 
return true;
 }
+
+int i915_scheduler_closefile(struct drm_device *dev, struct drm_file *file)
+{
+   struct i915_scheduler_queue_entry  *node;
+   struct drm_i915_private*dev_priv = dev-dev_private;
+   struct i915_scheduler  *scheduler = dev_priv-scheduler;
+   struct drm_i915_gem_request*req;
+   struct intel_engine_cs  *ring;
+   int i, ret;
+   unsigned long   flags;
+   boolfound;
+
+   if (!scheduler)
+   return 0;
+
+   for_each_ring(ring, dev_priv, i) {
+   do {
+   spin_lock_irqsave(scheduler-lock, flags);
+
+   found = false;
+   list_for_each_entry(node, 
scheduler-node_queue[ring-id], link) {
+   if (I915_SQS_IS_COMPLETE(node))
+   continue;
+
+   if (node-params.file != file)
+   continue;
+
+   found = true;
+   req = node-params.request;
+   i915_gem_request_reference(req);
+   break;
+   }
+
+   spin_unlock_irqrestore(scheduler-lock, flags);
+
+   if (found) {
+   do {
+   mutex_lock(dev-struct_mutex);
+   ret = i915_wait_request(req);
+   mutex_unlock(dev-struct_mutex);
+   if (ret == -EAGAIN)
+   msleep(20);
+   } while (ret == -EAGAIN);
+
+   mutex_lock(dev-struct_mutex);
+   i915_gem_request_unreference(req);
+   mutex_unlock(dev-struct_mutex);
+   }
+   } while (found);
+   }
+
+   spin_lock_irqsave(scheduler-lock, flags);
+   for_each_ring(ring, dev_priv, i) {
+   list_for_each_entry(node, scheduler-node_queue[ring-id], 
link) {
+   if (node-params.file != file)
+   continue;
+
+   WARN_ON(!I915_SQS_IS_COMPLETE(node));
+
+   node-params.file = NULL;
+   }
+   }
+   spin_unlock_irqrestore(scheduler-lock, flags);
+
+   return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h 
b/drivers/gpu/drm/i915/i915_scheduler.h
index 88cbfba..fbb6f7b 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -86,6 +86,8 @@ enum {
 
 booli915_scheduler_is_enabled(struct drm_device *dev);
 int i915_scheduler_init(struct drm_device *dev);
+int i915_scheduler_closefile(struct drm_device *dev,
+struct drm_file *file);
 int i915_scheduler_queue_execbuffer(struct 

[Intel-gfx] [RFC 17/39] drm/i915: Hook scheduler node clean up into retire requests

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The scheduler keeps its own lock on various DRM objects in order to guarantee
safe access long after the original execbuff IOCTL has completed. This is
especially important when pre-emption is enabled as the batch buffer might need
to be submitted to the hardware multiple times. This patch hooks the clean up of
these locks into the request retire function. The request can only be retired
after it has completed on the hardware and thus is no longer eligible for
re-submission. Thus there is no point holding on to the locks beyond that time.

For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c   |  3 +++
 drivers/gpu/drm/i915/i915_scheduler.c | 51 ---
 drivers/gpu/drm/i915/i915_scheduler.h |  1 +
 3 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 77a3b27..cb5af5d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1405,6 +1405,9 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
if (!list_empty(request-signal_list))
request-cancelled = true;
 
+   if (request-scheduler_qe)
+   i915_gem_scheduler_clean_node(request-scheduler_qe);
+
i915_gem_request_unreference(request);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index f5fa968..df2e27f 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -432,6 +432,38 @@ int i915_scheduler_handle_irq(struct intel_engine_cs *ring)
return 0;
 }
 
+void i915_gem_scheduler_clean_node(struct i915_scheduler_queue_entry *node)
+{
+   uint32_t i;
+
+   if (WARN_ON(!I915_SQS_IS_COMPLETE(node)))
+   return;
+
+   if (node-params.batch_obj) {
+   /* The batch buffer must be unpinned before it is unreferenced
+* otherwise the unpin fails with a missing vma!? */
+   if (node-params.dispatch_flags  I915_DISPATCH_SECURE)
+   
i915_gem_execbuff_release_batch_obj(node-params.batch_obj);
+
+   node-params.batch_obj = NULL;
+   }
+
+   /* Release the locked buffers: */
+   for (i = 0; i  node-num_objs; i++) {
+   drm_gem_object_unreference(
+   node-saved_objects[i].obj-base);
+   }
+   kfree(node-saved_objects);
+   node-saved_objects = NULL;
+   node-num_objs = 0;
+
+   /* Context too: */
+   if (node-params.ctx) {
+   i915_gem_context_unreference(node-params.ctx);
+   node-params.ctx = NULL;
+   }
+}
+
 static int i915_scheduler_remove(struct intel_engine_cs *ring)
 {
struct drm_i915_private *dev_priv = ring-dev-dev_private;
@@ -441,7 +473,7 @@ static int i915_scheduler_remove(struct intel_engine_cs 
*ring)
int flying = 0, queued = 0;
int ret = 0;
booldo_submit;
-   uint32_ti, min_seqno;
+   uint32_tmin_seqno;
struct list_headremove;
 
if (list_empty(scheduler-node_queue[ring-id]))
@@ -535,21 +567,8 @@ static int i915_scheduler_remove(struct intel_engine_cs 
*ring)
node = list_first_entry(remove, typeof(*node), link);
list_del(node-link);
 
-   /* The batch buffer must be unpinned before it is unreferenced
-* otherwise the unpin fails with a missing vma!? */
-   if (node-params.dispatch_flags  I915_DISPATCH_SECURE)
-   
i915_gem_execbuff_release_batch_obj(node-params.batch_obj);
-
-   /* Release the locked buffers: */
-   for (i = 0; i  node-num_objs; i++) {
-   drm_gem_object_unreference(
-   node-saved_objects[i].obj-base);
-   }
-   kfree(node-saved_objects);
-
-   /* Context too: */
-   if (node-params.ctx)
-   i915_gem_context_unreference(node-params.ctx);
+   /* Free up all the DRM object references */
+   i915_gem_scheduler_clean_node(node);
 
/* And anything else owned by the node: */
node-params.request-scheduler_qe = NULL;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h 
b/drivers/gpu/drm/i915/i915_scheduler.h
index 15878a4..73c5e7d 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -88,6 +88,7 @@ booli915_scheduler_is_enabled(struct drm_device *dev);
 int i915_scheduler_init(struct drm_device *dev);
 int i915_scheduler_closefile(struct drm_device *dev,
 struct drm_file *file);
+void

[Intel-gfx] [RFC 07/39] drm/i915: Start of GPU scheduler

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

Initial creation of scheduler source files. Note that this patch implements most
of the scheduler functionality but does not hook it in to the driver yet. It
also leaves the scheduler code in 'pass through' mode so that even when it is
hooked in, it will not actually do very much. This allows the hooks to be added
one at a time in byte size chunks and only when the scheduler is finally enabled
at the end does anything start happening.

The general theory of operation is that when batch buffers are submitted to the
driver, the execbuffer() code assigns a unique request and then packages up all
the information required to execute the batch buffer at a later time. This
package is given over to the scheduler which adds it to an internal node list.
The scheduler also scans the list of objects associated with the batch buffer
and compares them against the objects already in use by other buffers in the
node list. If matches are found then the new batch buffer node is marked as
being dependent upon the matching node. The same is done for the context object.
The scheduler also bumps up the priority of such matching nodes on the grounds
that the more dependencies a given batch buffer has the more important it is
likely to be.

The scheduler aims to have a given (tuneable) number of batch buffers in flight
on the hardware at any given time. If fewer than this are currently executing
when a new node is queued, then the node is passed straight through to the
submit function. Otherwise it is simply added to the queue and the driver
returns back to user land.

As each batch buffer completes, it raises an interrupt which wakes up the
scheduler. Note that it is possible for multiple buffers to complete before the
IRQ handler gets to run. Further, it is possible for the seqno values to be
un-ordered (particularly once pre-emption is enabled). However, the scheduler
keeps the list of executing buffers in order of hardware submission. Thus it can
scan through the list until a matching seqno is found and then mark all in
flight nodes from that point on as completed.

A deferred work queue is also poked by the interrupt handler. When this wakes up
it can do more involved processing such as actually removing completed nodes
from the queue and freeing up the resources associated with them (internal
memory allocations, DRM object references, context reference, etc.). The work
handler also checks the in flight count and calls the submission code if a new
slot has appeared.

When the scheduler's submit code is called, it scans the queued node list for
the highest priority node that has no unmet dependencies. Note that the
dependency calculation is complex as it must take inter-ring dependencies and
potential preemptions into account. Note also that in the future this will be
extended to include external dependencies such as the Android Native Sync file
descriptors and/or the linux dma-buff synchronisation scheme.

If a suitable node is found then it is sent to execbuff_final() for submission
to the hardware. The in flight count is then re-checked and a new node popped
from the list if appropriate.

Note that this patch does not implement pre-emptive scheduling. Only basic
scheduling by re-ordering batch buffer submission is currently implemented.

Change-Id: I1e08f59e650a3c2bbaaa9de7627da33849b06106
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/Makefile |   1 +
 drivers/gpu/drm/i915/i915_drv.h   |   4 +
 drivers/gpu/drm/i915/i915_gem.c   |   5 +
 drivers/gpu/drm/i915/i915_scheduler.c | 776 ++
 drivers/gpu/drm/i915/i915_scheduler.h |  91 
 5 files changed, 877 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_scheduler.c
 create mode 100644 drivers/gpu/drm/i915/i915_scheduler.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 47a74114..c367b39 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -9,6 +9,7 @@ ccflags-y := -Werror
 # core driver code
 i915-y := i915_drv.o \
  i915_params.o \
+ i915_scheduler.o \
   i915_suspend.o \
  i915_sysfs.o \
  intel_pm.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a680778..7d2a494 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1700,6 +1700,8 @@ struct i915_execbuffer_params {
struct drm_i915_gem_request *request;
 };
 
+struct i915_scheduler;
+
 struct drm_i915_private {
struct drm_device *dev;
struct kmem_cache *objects;
@@ -1932,6 +1934,8 @@ struct drm_i915_private {
 
struct i915_runtime_pm pm;
 
+   struct i915_scheduler *scheduler;
+
/* Abstract the submission mechanism (legacy ringbuffer or execlists) 
away */
struct {
int (*execbuf_submit)(struct i915_execbuffer_params *params,

[Intel-gfx] [RFC 15/39] drm/i915: Keep the reserved space mechanism happy

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

Ring space is reserved when constructing a request to ensure that the
subsequent 'add_request()' call cannot fail due to waiting for space
on a busy or broken GPU. However, the scheduler jumps in to the middle
of the execbuffer process between request creation and request
submission. Thus it needs to cancel the reserved space when the
request is simply added to the scheduler's queue and not yet
submitted. Similarly, it needs to re-reserve the space when it finally
does want to send the batch buffer to the hardware.

For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  7 +++
 drivers/gpu/drm/i915/i915_scheduler.c  |  4 
 drivers/gpu/drm/i915/intel_lrc.c   | 13 +++--
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 364e9cc..75d018d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1317,6 +1317,10 @@ int i915_gem_ringbuffer_submission_final(struct 
i915_execbuffer_params *params)
/* The mutex must be acquired before calling this function */
BUG_ON(!mutex_is_locked(params-dev-struct_mutex));
 
+   ret = intel_ring_reserve_space(params-request);
+   if (ret)
+   return ret;
+
intel_runtime_pm_get(dev_priv);
 
/*
@@ -1392,6 +1396,9 @@ error:
 */
intel_runtime_pm_put(dev_priv);
 
+   if (ret)
+   intel_ring_reserved_space_cancel(params-request-ringbuf);
+
return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 3494fd5..e145829 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -95,6 +95,8 @@ int i915_scheduler_queue_execbuffer(struct 
i915_scheduler_queue_entry *qe)
 
qe-scheduler_index = scheduler-index++;
 
+   intel_ring_reserved_space_cancel(qe-params.request-ringbuf);
+
scheduler-flags[qe-params.ring-id] |= i915_sf_submitting;
ret = dev_priv-gt.execbuf_final(qe-params);
scheduler-flags[qe-params.ring-id] = ~i915_sf_submitting;
@@ -126,6 +128,8 @@ int i915_scheduler_queue_execbuffer(struct 
i915_scheduler_queue_entry *qe)
node-stamp  = stamp;
i915_gem_request_reference(node-params.request);
 
+   intel_ring_reserved_space_cancel(node-params.request-ringbuf);
+
BUG_ON(node-params.request-scheduler_qe);
node-params.request-scheduler_qe = node;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a8c78ec..76d5023 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -908,13 +908,17 @@ int intel_execlists_submission_final(struct 
i915_execbuffer_params *params)
/* The mutex must be acquired before calling this function */
BUG_ON(!mutex_is_locked(params-dev-struct_mutex));
 
+   ret = intel_logical_ring_reserve_space(params-request);
+   if (ret)
+   return ret;
+
/*
 * Unconditionally invalidate gpu caches and ensure that we do flush
 * any residual writes from the previous batch.
 */
ret = logical_ring_invalidate_all_caches(params-request);
if (ret)
-   return ret;
+   goto err;
 
if (ring == dev_priv-ring[RCS] 
params-instp_mode != dev_priv-relative_constants_mode) {
@@ -938,13 +942,18 @@ int intel_execlists_submission_final(struct 
i915_execbuffer_params *params)
 
ret = ring-emit_bb_start(params-request, exec_start, 
params-dispatch_flags);
if (ret)
-   return ret;
+   goto err;
 
trace_i915_gem_ring_dispatch(params-request, params-dispatch_flags);
 
i915_gem_execbuffer_retire_commands(params);
 
return 0;
+
+err:
+   intel_ring_reserved_space_cancel(params-request-ringbuf);
+
+   return ret;
 }
 
 void intel_execlists_retire_requests(struct intel_engine_cs *ring)
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 00/39] GPU scheduler for i915 driver

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

Implemented a batch buffer submission scheduler for the i915 DRM driver.

The general theory of operation is that when batch buffers are submitted to the
driver, the execbuffer() code assigns a unique seqno value and then packages up
all the information required to execute the batch buffer at a later time. This
package is given over to the scheduler which adds it to an internal node list.
The scheduler also scans the list of objects associated with the batch buffer
and compares them against the objects already in use by other buffers in the
node list. If matches are found then the new batch buffer node is marked as
being dependent upon the matching node. The same is done for the context object.
The scheduler also bumps up the priority of such matching nodes on the grounds
that the more dependencies a given batch buffer has the more important it is
likely to be.

The scheduler aims to have a given (tuneable) number of batch buffers in flight
on the hardware at any given time. If fewer than this are currently executing
when a new node is queued, then the node is passed straight through to the
submit function. Otherwise it is simply added to the queue and the driver
returns back to user land.

As each batch buffer completes, it raises an interrupt which wakes up the
scheduler. Note that it is possible for multiple buffers to complete before the
IRQ handler gets to run. Further, the seqno values of the individual buffers are
not necessary incrementing as the scheduler may have re-ordered their
submission. However, the scheduler keeps the list of executing buffers in order
of hardware submission. Thus it can scan through the list until a matching seqno
is found and then mark all in flight nodes from that point on as completed.

A deferred work queue is also poked by the interrupt handler. When this wakes up
it can do more involved processing such as actually removing completed nodes
from the queue and freeing up the resources associated with them (internal
memory allocations, DRM object references, context reference, etc.). The work
handler also checks the in flight count and calls the submission code if a new
slot has appeared.

When the scheduler's submit code is called, it scans the queued node list for
the highest priority node that has no unmet dependencies. Note that the
dependency calculation is complex as it must take inter-ring dependencies and
potential preemptions into account. Note also that in the future this will be
extended to include external dependencies such as the Android Native Sync file
descriptors and/or the linux dma-buff synchronisation scheme.

If a suitable node is found then it is sent to execbuff_final() for submission
to the hardware. The in flight count is then re-checked and a new node popped
from the list if appropriate.

The scheduler also allows high priority batch buffers (e.g. from a desktop
compositor) to jump ahead of whatever is already running if the underlying
hardware supports pre-emption. In this situation, any work that was pre-empted
is returned to the queued list ready to be resubmitted when no more high
priority work is outstanding.

[Patches against drm-intel-nightly tree fetched 15/07/2015 with struct fence
conversion patches applied]

Dave Gordon (1):
  drm/i915: Updating assorted register and status page definitions

John Harrison (38):
  drm/i915: Add total count to context status debugfs output
  drm/i915: Explicit power enable during deferred context initialisation
  drm/i915: Prelude to splitting i915_gem_do_execbuffer in two
  drm/i915: Split i915_dem_do_execbuffer() in half
  drm/i915: Re-instate request-uniq because it is extremely useful
  drm/i915: Start of GPU scheduler
  drm/i915: Prepare retire_requests to handle out-of-order seqnos
  drm/i915: Added scheduler hook into i915_gem_complete_requests_ring()
  drm/i915: Disable hardware semaphores when GPU scheduler is enabled
  drm/i915: Force MMIO flips when scheduler enabled
  drm/i915: Added scheduler hook when closing DRM file handles
  drm/i915: Added deferred work handler for scheduler
  drm/i915: Redirect execbuffer_final() via scheduler
  drm/i915: Keep the reserved space mechanism happy
  drm/i915: Added tracking/locking of batch buffer objects
  drm/i915: Hook scheduler node clean up into retire requests
  drm/i915: Added scheduler interrupt handler hook
  drm/i915: Added scheduler support to __wait_request() calls
  drm/i915: Added scheduler support to page fault handler
  drm/i915: Added scheduler flush calls to ring throttle and idle functions
  drm/i915: Add scheduler hook to GPU reset
  drm/i915: Added a module parameter for allowing scheduler overrides
  drm/i915: Support for 'unflushed' ring idle
  drm/i915: Defer seqno allocation until actual hardware submission time
  drm/i915: Added immediate submission override to scheduler
  drm/i915: Add sync wait support to scheduler
  drm/i915: Connecting execbuff fences to scheduler
  drm/i915: 

[Intel-gfx] [RFC 03/39] drm/i915: Explicit power enable during deferred context initialisation

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

A later patch in this series re-organises the batch buffer submission
code. Part of that is to reduce the scope of a pm_get/put pair.
Specifically, they previously wrapped the entire submission path from
the very start to the very end, now they only wrap the actual hardware
submission part in the back half.

While that is a good thing in general, it causes a problem with the
deferred context initialisation. That is done quite early on in the
execbuf code path - it happens at context validation time rather than
context switch time. Some of the deferred work requires the power to
be enabled. Hence this patch adds an explicit power reference count to
the deferred initialisation code itself.

Change-Id: Id7b1535dfd8809a2bd5546272de2bbec39da2868
Issue: GMINL-5159
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 18dbd5c..8aa9a18 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2317,12 +2317,15 @@ int intel_lr_context_deferred_create(struct 
intel_context *ctx,
WARN_ON(ctx-legacy_hw_ctx.rcs_state != NULL);
WARN_ON(ctx-engine[ring-id].state);
 
+   intel_runtime_pm_get(dev-dev_private);
+
context_size = round_up(get_lr_context_size(ring), 4096);
 
ctx_obj = i915_gem_alloc_object(dev, context_size);
if (!ctx_obj) {
DRM_DEBUG_DRIVER(Alloc LRC backing obj failed.\n);
-   return -ENOMEM;
+   ret = -ENOMEM;
+   goto error_pm;
}
 
if (is_global_default_ctx) {
@@ -2331,7 +2334,7 @@ int intel_lr_context_deferred_create(struct intel_context 
*ctx,
DRM_DEBUG_DRIVER(Pin LRC backing obj failed: %d\n,
ret);
drm_gem_object_unreference(ctx_obj-base);
-   return ret;
+   goto error_pm;
}
}
 
@@ -2415,6 +2418,7 @@ int intel_lr_context_deferred_create(struct intel_context 
*ctx,
ctx-rcs_initialized = true;
}
 
+   intel_runtime_pm_put(dev-dev_private);
return 0;
 
 error:
@@ -2428,6 +2432,8 @@ error_unpin_ctx:
if (is_global_default_ctx)
i915_gem_object_ggtt_unpin(ctx_obj);
drm_gem_object_unreference(ctx_obj-base);
+error_pm:
+   intel_runtime_pm_put(dev-dev_private);
return ret;
 }
 
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 05/39] drm/i915: Split i915_dem_do_execbuffer() in half

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

Split the execbuffer() function in half. The first half collects and validates
all the information requried to process the batch buffer. It also does all the
object pinning, relocations, active list management, etc - basically anything
that must be done upfront before the IOCTL returns and allows the user land side
to start changing/freeing things. The second half does the actual ring
submission.

This change implements the split but leaves the back half being called directly
from the end of the front half.

Change-Id: I5e1c77639ce526ab2401b0323186c518bf13da0a
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h|  11 +++
 drivers/gpu/drm/i915/i915_gem.c|   2 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 130 -
 drivers/gpu/drm/i915/intel_lrc.c   |  58 +
 drivers/gpu/drm/i915/intel_lrc.h   |   1 +
 5 files changed, 147 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 289ddd6..28d51ac 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1684,10 +1684,18 @@ struct i915_execbuffer_params {
struct drm_device   *dev;
struct drm_file *file;
uint32_tdispatch_flags;
+   uint32_targs_flags;
uint32_targs_batch_start_offset;
+   uint32_targs_batch_len;
+   uint32_targs_num_cliprects;
+   uint32_targs_DR1;
+   uint32_targs_DR4;
uint32_tbatch_obj_vm_offset;
struct intel_engine_cs  *ring;
struct drm_i915_gem_object  *batch_obj;
+   struct drm_clip_rect*cliprects;
+   uint32_tinstp_mask;
+   int instp_mode;
struct intel_context*ctx;
struct drm_i915_gem_request *request;
 };
@@ -1929,6 +1937,7 @@ struct drm_i915_private {
int (*execbuf_submit)(struct i915_execbuffer_params *params,
  struct drm_i915_gem_execbuffer2 *args,
  struct list_head *vmas);
+   int (*execbuf_final)(struct i915_execbuffer_params *params);
int (*init_rings)(struct drm_device *dev);
void (*cleanup_ring)(struct intel_engine_cs *ring);
void (*stop_ring)(struct intel_engine_cs *ring);
@@ -2743,9 +2752,11 @@ int i915_gem_sw_finish_ioctl(struct drm_device *dev, 
void *data,
 void i915_gem_execbuffer_move_to_active(struct list_head *vmas,
struct drm_i915_gem_request *req);
 void i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params 
*params);
+void i915_gem_execbuff_release_batch_obj(struct drm_i915_gem_object 
*batch_obj);
 int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
   struct drm_i915_gem_execbuffer2 *args,
   struct list_head *vmas);
+int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params 
*params);
 int i915_gem_execbuffer(struct drm_device *dev, void *data,
struct drm_file *file_priv);
 int i915_gem_execbuffer2(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8150820..2a5667b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5481,11 +5481,13 @@ int i915_gem_init(struct drm_device *dev)
 
if (!i915.enable_execlists) {
dev_priv-gt.execbuf_submit = i915_gem_ringbuffer_submission;
+   dev_priv-gt.execbuf_final = 
i915_gem_ringbuffer_submission_final;
dev_priv-gt.init_rings = i915_gem_init_rings;
dev_priv-gt.cleanup_ring = intel_cleanup_ring_buffer;
dev_priv-gt.stop_ring = intel_stop_ring_buffer;
} else {
dev_priv-gt.execbuf_submit = intel_execlists_submission;
+   dev_priv-gt.execbuf_final = intel_execlists_submission_final;
dev_priv-gt.init_rings = intel_logical_rings_init;
dev_priv-gt.cleanup_ring = intel_logical_ring_cleanup;
dev_priv-gt.stop_ring = intel_logical_ring_stop;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 988ecd4..ba9d595 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1198,14 +1198,10 @@ i915_gem_ringbuffer_submission(struct 
i915_execbuffer_params *params,
   struct drm_i915_gem_execbuffer2 *args,
   

[Intel-gfx] [RFC 20/39] drm/i915: Added scheduler support to page fault handler

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

GPU page faults can now require scheduler operation in order to complete. For
example, in order to free up sufficient memory to handle the fault the handler
must wait for a batch buffer to complete that has not even been sent to the
hardware yet. Thus EAGAIN no longer means a GPU hang, it can occur under normal
operation.

Change-Id: Iff6bd2744ef12bb7405fbcd6b43c286caad4141f
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f713cda..dd9ebbe 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1919,10 +1919,15 @@ out:
}
case -EAGAIN:
/*
-* EAGAIN means the gpu is hung and we'll wait for the error
-* handler to reset everything when re-faulting in
+* EAGAIN can mean the gpu is hung and we'll have to wait for
+* the error handler to reset everything when re-faulting in
 * i915_mutex_lock_interruptible.
+*
+* It can also indicate various other nonfatal errors for which
+* the best response is to give other threads a chance to run,
+* and then retry the failing operation in its entirety.
 */
+   /*FALLTHRU*/
case 0:
case -ERESTARTSYS:
case -EINTR:
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 06/39] drm/i915: Re-instate request-uniq because it is extremely useful

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The seqno value cannot always be used when debugging issues via trace
points. This is because it can be reset back to start, especially
during TDR type tests. Also, when the scheduler arrives the seqno is
only valid while a given request is executing on the hardware. While
the request is simply queued waiting for submission, it's seqno value
will be zero (meaning invalid).

For: VIZ-5115
Signed-off-by: John Harrison john.c.harri...@intel.com
Reviewed-by: Tomas Elf tomas@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h   |  5 +
 drivers/gpu/drm/i915/i915_gem.c   |  3 ++-
 drivers/gpu/drm/i915/i915_trace.h | 25 +
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 28d51ac..a680778 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1945,6 +1945,8 @@ struct drm_i915_private {
 
bool edp_low_vswing;
 
+   uint32_t request_uniq;
+
/*
 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
 * will be rejected. Instead look for a better place.
@@ -2186,6 +2188,9 @@ struct drm_i915_gem_request {
/** GEM sequence number associated with this request. */
uint32_t seqno;
 
+   /* Unique identifier which can be used for trace points  debug */
+   uint32_t uniq;
+
/** Position in the ringbuffer of the start of the request */
u32 head;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2a5667b..0c407ae 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2828,7 +2828,7 @@ static void i915_fence_value_str(struct fence *fence, 
char *str, int size)
 
req = container_of(fence, typeof(*req), fence);
 
-   snprintf(str, size, %d [%d], req-fence.seqno, req-seqno);
+   snprintf(str, size, %d [%d:%d], req-fence.seqno, req-uniq, 
req-seqno);
 }
 
 static const struct fence_ops i915_gem_request_fops = {
@@ -2974,6 +2974,7 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
 
req-i915 = dev_priv;
req-ring = ring;
+   req-uniq = dev_priv-request_uniq++;
req-ctx  = ctx;
i915_gem_context_reference(req-ctx);
 
diff --git a/drivers/gpu/drm/i915/i915_trace.h 
b/drivers/gpu/drm/i915/i915_trace.h
index f455194..796c630 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -433,6 +433,7 @@ TRACE_EVENT(i915_gem_ring_sync_to,
 __field(u32, dev)
 __field(u32, sync_from)
 __field(u32, sync_to)
+__field(u32, uniq_to)
 __field(u32, seqno)
 ),
 
@@ -440,13 +441,14 @@ TRACE_EVENT(i915_gem_ring_sync_to,
   __entry-dev = from-dev-primary-index;
   __entry-sync_from = from-id;
   __entry-sync_to = to_req-ring-id;
+  __entry-uniq_to = to_req-uniq;
   __entry-seqno = i915_gem_request_get_seqno(req);
   ),
 
-   TP_printk(dev=%u, sync-from=%u, sync-to=%u, seqno=%u,
+   TP_printk(dev=%u, sync-from=%u, sync-to=%u, seqno=%u, to_uniq=%u,
  __entry-dev,
  __entry-sync_from, __entry-sync_to,
- __entry-seqno)
+ __entry-seqno, __entry-uniq_to)
 );
 
 TRACE_EVENT(i915_gem_ring_dispatch,
@@ -481,6 +483,7 @@ TRACE_EVENT(i915_gem_ring_flush,
TP_STRUCT__entry(
 __field(u32, dev)
 __field(u32, ring)
+__field(u32, uniq)
 __field(u32, invalidate)
 __field(u32, flush)
 ),
@@ -488,12 +491,13 @@ TRACE_EVENT(i915_gem_ring_flush,
TP_fast_assign(
   __entry-dev = req-ring-dev-primary-index;
   __entry-ring = req-ring-id;
+  __entry-uniq = req-uniq;
   __entry-invalidate = invalidate;
   __entry-flush = flush;
   ),
 
-   TP_printk(dev=%u, ring=%x, invalidate=%04x, flush=%04x,
- __entry-dev, __entry-ring,
+   TP_printk(dev=%u, ring=%x, request=%u, invalidate=%04x, 
flush=%04x,
+ __entry-dev, __entry-ring, __entry-uniq,
  __entry-invalidate, __entry-flush)
 );
 
@@ -504,6 +508,7 @@ DECLARE_EVENT_CLASS(i915_gem_request,
TP_STRUCT__entry(
 __field(u32, dev)
 __field(u32, ring)
+__field(u32, uniq)
 

[Intel-gfx] [RFC 13/39] drm/i915: Added deferred work handler for scheduler

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The scheduler needs to do interrupt triggered work that is too complex to do in
the interrupt handler. Thus it requires a deferred work handler to process this
work asynchronously.

Change-Id: I0f7cc2b6f034a50bf8f7e368b60ad8bafd00f993
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_dma.c   |  3 +++
 drivers/gpu/drm/i915/i915_drv.h   | 10 ++
 drivers/gpu/drm/i915/i915_gem.c   |  2 ++
 drivers/gpu/drm/i915/i915_scheduler.c | 23 +--
 drivers/gpu/drm/i915/i915_scheduler.h |  1 +
 5 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0a25017..4d3370f 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1084,6 +1084,9 @@ int i915_driver_unload(struct drm_device *dev)
WARN_ON(unregister_oom_notifier(dev_priv-mm.oom_notifier));
unregister_shrinker(dev_priv-mm.shrinker);
 
+   /* Cancel the scheduler work handler, which should be idle now. */
+   cancel_work_sync(dev_priv-mm.scheduler_work);
+
io_mapping_free(dev_priv-gtt.mappable);
arch_phys_wc_del(dev_priv-gtt.mtrr);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 58f53ec..2b3fab6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1299,6 +1299,16 @@ struct i915_gem_mm {
struct delayed_work retire_work;
 
/**
+* New scheme is to get an interrupt after every work packet
+* in order to allow the low latency scheduling of pending
+* packets. The idea behind adding new packets to a pending
+* queue rather than directly into the hardware ring buffer
+* is to allow high priority packets to over take low priority
+* ones.
+*/
+   struct work_struct scheduler_work;
+
+   /**
 * When we detect an idle GPU, we want to turn on
 * powersaving features. So once we see that there
 * are no more requests outstanding and no more
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e3c4032..77a3b27 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5631,6 +5631,8 @@ i915_gem_load(struct drm_device *dev)
  i915_gem_retire_work_handler);
INIT_DELAYED_WORK(dev_priv-mm.idle_work,
  i915_gem_idle_work_handler);
+   INIT_WORK(dev_priv-mm.scheduler_work,
+   i915_gem_scheduler_work_handler);
init_waitqueue_head(dev_priv-gpu_error.reset_queue);
 
dev_priv-relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 50bcccb..3494fd5 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -407,12 +407,12 @@ int i915_scheduler_handle_irq(struct intel_engine_cs 
*ring)
i915_scheduler_seqno_complete(ring, seqno);
spin_unlock_irqrestore(scheduler-lock, flags);
 
-   /* XXX: Need to also call i915_scheduler_remove() via work handler. */
+   queue_work(dev_priv-wq, dev_priv-mm.scheduler_work);
 
return 0;
 }
 
-int i915_scheduler_remove(struct intel_engine_cs *ring)
+static int i915_scheduler_remove(struct intel_engine_cs *ring)
 {
struct drm_i915_private *dev_priv = ring-dev-dev_private;
struct i915_scheduler   *scheduler = dev_priv-scheduler;
@@ -531,6 +531,25 @@ int i915_scheduler_remove(struct intel_engine_cs *ring)
return ret;
 }
 
+void i915_gem_scheduler_work_handler(struct work_struct *work)
+{
+   struct intel_engine_cs  *ring;
+   struct drm_i915_private *dev_priv;
+   struct drm_device   *dev;
+   int i;
+
+   dev_priv = container_of(work, struct drm_i915_private, 
mm.scheduler_work);
+   dev = dev_priv-dev;
+
+   mutex_lock(dev-struct_mutex);
+
+   for_each_ring(ring, dev_priv, i) {
+   i915_scheduler_remove(ring);
+   }
+
+   mutex_unlock(dev-struct_mutex);
+}
+
 static void i915_scheduler_priority_bump_clear(struct i915_scheduler 
*scheduler)
 {
struct i915_scheduler_queue_entry *node;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h 
b/drivers/gpu/drm/i915/i915_scheduler.h
index fbb6f7b..15878a4 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -90,6 +90,7 @@ int i915_scheduler_closefile(struct drm_device *dev,
 struct drm_file *file);
 int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry 
*qe);
 int i915_scheduler_handle_irq(struct intel_engine_cs *ring);
+voidi915_gem_scheduler_work_handler(struct work_struct *work);
 bool

[Intel-gfx] [RFC 26/39] drm/i915: Added immediate submission override to scheduler

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

To aid with debugging issues related to the scheduler, it can be useful to
ensure that all batch buffers are submitted immediately rather than queued until
later. This change adds an override flag via the module parameter to force
instant submission.

Change-Id: I7652df53e2d3c3d77d78bebcf99856e2c53f2801
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_scheduler.c | 7 +--
 drivers/gpu/drm/i915/i915_scheduler.h | 1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 224c8b4..c7139f8 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -238,8 +238,11 @@ int i915_scheduler_queue_execbuffer(struct 
i915_scheduler_queue_entry *qe)
 
list_add_tail(node-link, scheduler-node_queue[ring-id]);
 
-   not_flying = i915_scheduler_count_flying(scheduler, ring) 
-scheduler-min_flying;
+   if (i915.scheduler_override  i915_so_submit_on_queue)
+   not_flying = true;
+   else
+   not_flying = i915_scheduler_count_flying(scheduler, ring) 
+scheduler-min_flying;
 
spin_unlock_irqrestore(scheduler-lock, flags);
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h 
b/drivers/gpu/drm/i915/i915_scheduler.h
index 7d743c9..ce94b0b 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -92,6 +92,7 @@ enum {
 /* Options for 'scheduler_override' module parameter: */
 enum {
i915_so_direct_submit   = (1  0),
+   i915_so_submit_on_queue = (1  1),
 };
 
 booli915_scheduler_is_enabled(struct drm_device *dev);
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 10/39] drm/i915: Disable hardware semaphores when GPU scheduler is enabled

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

Hardware sempahores require seqno values to be continuously incrementing.
However, the scheduler's reordering of batch buffers means that the seqno values
going through the hardware could be out of order. Thus semaphores can not be
used.

On the other hand, the scheduler superceeds the need for hardware semaphores
anyway. Having one ring stall waiting for something to complete on another ring
is inefficient if that ring could be working on some other, independent task.
This is what the scheduler is meant to do - keep the hardware as busy as
possible by reordering batch buffers to avoid dependency stalls.

Change-Id: I95d1fceacd370455a9720d7dca55cfd0a1f6beaa
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.c | 9 +
 drivers/gpu/drm/i915/i915_scheduler.c   | 7 +++
 drivers/gpu/drm/i915/i915_scheduler.h   | 1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c | 4 
 4 files changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index db48aee..abd7efc 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -34,6 +34,7 @@
 #include i915_drv.h
 #include i915_trace.h
 #include intel_drv.h
+#include i915_scheduler.h
 
 #include linux/console.h
 #include linux/module.h
@@ -516,6 +517,14 @@ void intel_detect_pch(struct drm_device *dev)
 
 bool i915_semaphore_is_enabled(struct drm_device *dev)
 {
+   /* Hardware semaphores are not compatible with the scheduler due to the
+* seqno values being potentially out of order. However, semaphores are
+* also not required as the scheduler will handle interring dependencies
+* and try do so in a way that does not cause dead time on the hardware.
+*/
+   if (i915_scheduler_is_enabled(dev))
+   return false;
+
if (INTEL_INFO(dev)-gen  6)
return false;
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 0d1cbe3..f7fd9a4 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -38,6 +38,13 @@ static int i915_scheduler_priority_bump(struct 
i915_scheduler *scheduler
struct 
i915_scheduler_queue_entry *target,
uint32_t bump);
 
+bool i915_scheduler_is_enabled(struct drm_device *dev)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+
+   return dev_priv-scheduler != NULL;
+}
+
 int i915_scheduler_init(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev-dev_private;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h 
b/drivers/gpu/drm/i915/i915_scheduler.h
index 6b2585a..88cbfba 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -84,6 +84,7 @@ enum {
i915_sf_submitting  = (1  1),
 };
 
+booli915_scheduler_is_enabled(struct drm_device *dev);
 int i915_scheduler_init(struct drm_device *dev);
 int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry 
*qe);
 int i915_scheduler_handle_irq(struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 83a5254..df0cd48 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -32,6 +32,7 @@
 #include drm/i915_drm.h
 #include i915_trace.h
 #include intel_drv.h
+#include i915_scheduler.h
 
 bool
 intel_ring_initialized(struct intel_engine_cs *ring)
@@ -1379,6 +1380,9 @@ gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
u32 wait_mbox = signaller-semaphore.mbox.wait[waiter-id];
int ret;
 
+   /* Arithmetic on sequence numbers is unreliable with a scheduler. */
+   BUG_ON(i915_scheduler_is_enabled(signaller-dev));
+
/* Throughout all of the GEM code, seqno passed implies our current
 * seqno is = the last seqno executed. However for hardware the
 * comparison is strictly greater than.
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 27/39] drm/i915: Add sync wait support to scheduler

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

There is a sync framework to allow work for multiple independent
systems to be synchronised with each other but without stalling
the CPU whether in the application or the driver. This patch adds
support for this framework to the GPU scheduler.

Batch buffers can now have sync framework fence objects associated with
them. The scheduler will look at this fence when deciding what to
submit next to the hardware. If the fence is outstanding then that
batch buffer will be passed over in preference of one that is ready to
run. If no other batches are ready then the scheduler will queue an
asynchronous callback to be woken up when the fence has been
signalled. The callback will wake the scheduler and submit the now
ready batch buffer.

For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h   |   1 +
 drivers/gpu/drm/i915/i915_scheduler.c | 163 --
 drivers/gpu/drm/i915/i915_scheduler.h |   6 ++
 3 files changed, 165 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 12b4986..b568432 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1703,6 +1703,7 @@ struct i915_execbuffer_params {
uint32_tbatch_obj_vm_offset;
struct intel_engine_cs  *ring;
struct drm_i915_gem_object  *batch_obj;
+   struct sync_fence   *fence_wait;
struct drm_clip_rect*cliprects;
uint32_tinstp_mask;
int instp_mode;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index c7139f8..19577c9 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -25,6 +25,7 @@
 #include i915_drv.h
 #include intel_drv.h
 #include i915_scheduler.h
+#include ../drivers/staging/android/sync.h
 
 static int i915_scheduler_fly_node(struct i915_scheduler_queue_entry 
*node);
 static int i915_scheduler_remove_dependent(struct i915_scheduler 
*scheduler,
@@ -100,6 +101,9 @@ int i915_scheduler_queue_execbuffer(struct 
i915_scheduler_queue_entry *qe)
 
qe-scheduler_index = scheduler-index++;
 
+   WARN_ON(qe-params.fence_wait 
+   (atomic_read(qe-params.fence_wait-status) == 0));
+
intel_ring_reserved_space_cancel(qe-params.request-ringbuf);
 
scheduler-flags[qe-params.ring-id] |= i915_sf_submitting;
@@ -134,6 +138,11 @@ int i915_scheduler_queue_execbuffer(struct 
i915_scheduler_queue_entry *qe)
if (qe-params.dispatch_flags  I915_DISPATCH_SECURE)

i915_gem_execbuff_release_batch_obj(qe-params.batch_obj);
 
+#ifdef CONFIG_SYNC
+   if (qe-params.fence_wait)
+   sync_fence_put(qe-params.fence_wait);
+#endif
+
return 0;
}
 
@@ -625,6 +634,11 @@ static int i915_scheduler_remove(struct intel_engine_cs 
*ring)
node = list_first_entry(remove, typeof(*node), link);
list_del(node-link);
 
+#ifdef CONFIG_SYNC
+   if (node-params.fence_wait)
+   sync_fence_put(node-params.fence_wait);
+#endif
+
/* Free up all the DRM object references */
i915_gem_scheduler_clean_node(node);
 
@@ -845,17 +859,100 @@ static int i915_scheduler_submit_max_priority(struct 
intel_engine_cs *ring,
return count;
 }
 
+#ifdef CONFIG_SYNC
+/* Use a private structure in order to pass the 'dev' pointer through */
+struct i915_sync_fence_waiter {
+   struct sync_fence_waiter sfw;
+   struct drm_device*dev;
+   struct i915_scheduler_queue_entry *node;
+};
+
+static void i915_scheduler_wait_fence_signaled(struct sync_fence *fence,
+  struct sync_fence_waiter *waiter)
+{
+   struct i915_sync_fence_waiter *i915_waiter;
+   struct drm_i915_private *dev_priv = NULL;
+
+   i915_waiter = container_of(waiter, struct i915_sync_fence_waiter, sfw);
+   dev_priv= (i915_waiter  i915_waiter-dev) ?
+   i915_waiter-dev-dev_private : 
NULL;
+
+   /*
+* NB: The callback is executed at interrupt time, thus it can not
+* call _submit() directly. It must go via the delayed work handler.
+*/
+   if (dev_priv) {
+   struct i915_scheduler   *scheduler;
+   unsigned long   flags;
+
+   scheduler = dev_priv-scheduler;
+
+   spin_lock_irqsave(scheduler-lock, flags);
+   i915_waiter-node-flags = ~i915_qef_fence_waiting;
+   spin_unlock_irqrestore(scheduler-lock, flags);
+
+   queue_work(dev_priv-wq, dev_priv-mm.scheduler_work);
+   }
+
+   

[Intel-gfx] [RFC 32/39] drm/i915: Added debug state dump facilities to scheduler

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

When debugging batch buffer submission issues, it is useful to be able to see
what the current state of the scheduler is. This change adds functions for
decoding the internal scheduler state and reporting it.

Change-Id: I0634168e3f3465ff023f5a673165c90b07e535b6
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_scheduler.c | 276 ++
 drivers/gpu/drm/i915/i915_scheduler.h |  14 ++
 2 files changed, 290 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index f0c99ad..e22f6b8 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -36,6 +36,9 @@ static int i915_scheduler_submit_max_priority(struct 
intel_engine_cs *ri
   bool is_locked);
 static uint32_ti915_scheduler_count_flying(struct i915_scheduler 
*scheduler,
   struct intel_engine_cs *ring);
+static int i915_scheduler_dump_locked(struct intel_engine_cs *ring,
+ const char *msg);
+static int i915_scheduler_dump_all_locked(struct drm_device *dev, 
const char *msg);
 static voidi915_scheduler_priority_bump_clear(struct i915_scheduler 
*scheduler);
 static int i915_scheduler_priority_bump(struct i915_scheduler 
*scheduler,
struct 
i915_scheduler_queue_entry *target,
@@ -53,6 +56,115 @@ bool i915_scheduler_is_enabled(struct drm_device *dev)
return dev_priv-scheduler != NULL;
 }
 
+const char *i915_qe_state_str(struct i915_scheduler_queue_entry *node)
+{
+   static char str[50];
+   char*ptr = str;
+
+   *(ptr++) = node-bumped ? 'B' : '-',
+
+   *ptr = 0;
+
+   return str;
+}
+
+char i915_scheduler_queue_status_chr(enum i915_scheduler_queue_status status)
+{
+   switch (status) {
+   case i915_sqs_none:
+   return 'N';
+
+   case i915_sqs_queued:
+   return 'Q';
+
+   case i915_sqs_popped:
+   return 'X';
+
+   case i915_sqs_flying:
+   return 'F';
+
+   case i915_sqs_complete:
+   return 'C';
+
+   case i915_sqs_dead:
+   return 'D';
+
+   default:
+   break;
+   }
+
+   return '?';
+}
+
+const char *i915_scheduler_queue_status_str(
+   enum i915_scheduler_queue_status status)
+{
+   static char str[50];
+
+   switch (status) {
+   case i915_sqs_none:
+   return None;
+
+   case i915_sqs_queued:
+   return Queued;
+
+   case i915_sqs_popped:
+   return Popped;
+
+   case i915_sqs_flying:
+   return Flying;
+
+   case i915_sqs_complete:
+   return Complete;
+
+   case i915_sqs_dead:
+   return Dead;
+
+   default:
+   break;
+   }
+
+   sprintf(str, [Unknown_%d!], status);
+   return str;
+}
+
+const char *i915_scheduler_flag_str(uint32_t flags)
+{
+   static char str[100];
+   char   *ptr = str;
+
+   *ptr = 0;
+
+#define TEST_FLAG(flag, msg)   \
+   do {\
+   if (flags  (flag)) {   \
+   strcpy(ptr, msg);   \
+   ptr += strlen(ptr); \
+   flags = ~(flag);   \
+   }   \
+   } while (0)
+
+   TEST_FLAG(i915_sf_interrupts_enabled, IntOn|);
+   TEST_FLAG(i915_sf_submitting, Submitting|);
+   TEST_FLAG(i915_sf_dump_force, DumpForce|);
+   TEST_FLAG(i915_sf_dump_details,   DumpDetails|);
+   TEST_FLAG(i915_sf_dump_dependencies,  DumpDeps|);
+
+#undef TEST_FLAG
+
+   if (flags) {
+   sprintf(ptr, Unknown_0x%X!, flags);
+   ptr += strlen(ptr);
+   }
+
+   if (ptr == str)
+   strcpy(str, -);
+   else
+   ptr[-1] = 0;
+
+   return str;
+};
+
 int i915_scheduler_init(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev-dev_private;
@@ -709,6 +821,170 @@ void i915_gem_scheduler_work_handler(struct work_struct 
*work)
mutex_unlock(dev-struct_mutex);
 }
 
+int i915_scheduler_dump_all(struct drm_device *dev, const char *msg)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct i915_scheduler   *scheduler = dev_priv-scheduler;
+   unsigned long   flags;
+   int ret;
+
+   spin_lock_irqsave(scheduler-lock, flags);
+   ret = i915_scheduler_dump_all_locked(dev, msg);
+   spin_unlock_irqrestore(scheduler-lock, flags);
+
+   return ret;
+}
+

[Intel-gfx] [RFC 24/39] drm/i915: Support for 'unflushed' ring idle

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

When the seqno wraps around zero, the entire GPU is forced to be idle
for some reason (possibly only to work around issues with hardware
semaphores but no-one seems too sure!). This causes a problem if the
force idle occurs at an inopportune moment such as in the middle of
submitting a batch buffer. Specifically, it would lead to recursive
submits - submitting work requires a new seqno, the new seqno requires
idling the ring, idling the ring requires submitting work, submitting
work requires a new seqno...

This change adds a 'flush' parameter to the idle function call which
specifies whether the scheduler queues should be flushed out. I.e. is
the call intended to just idle the ring as it is right now (no flush)
or is it intended to force all outstanding work out of the system
(with flush).

In the seqno wrap case, pending work is not an issue because the next
operation will be to submit it. However, in other cases, the intention
is to make sure everything that could be done has been done.

Change-Id: I182e9a5853666c64ecc9e84d8a8b820a7f8e8836
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c |  4 ++--
 drivers/gpu/drm/i915/intel_lrc.c|  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 17 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 4 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6d72caa..20c696f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2474,7 +2474,7 @@ i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
 
/* Carefully retire all requests without writing to the rings */
for_each_ring(ring, dev_priv, i) {
-   ret = intel_ring_idle(ring);
+   ret = intel_ring_idle(ring, false);
if (ret)
return ret;
}
@@ -3732,7 +3732,7 @@ int i915_gpu_idle(struct drm_device *dev)
i915_add_request_no_flush(req);
}
 
-   ret = intel_ring_idle(ring);
+   ret = intel_ring_idle(ring, true);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 76d5023..a811d0b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -990,7 +990,7 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring)
if (!intel_ring_initialized(ring))
return;
 
-   ret = intel_ring_idle(ring);
+   ret = intel_ring_idle(ring, true);
if (ret  !i915_reset_in_progress(to_i915(ring-dev)-gpu_error))
DRM_ERROR(failed to quiesce %s whilst cleaning up: %d\n,
  ring-name, ret);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e0992b7..afb04de 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2177,9 +2177,22 @@ static void __wrap_ring_buffer(struct intel_ringbuffer 
*ringbuf)
intel_ring_update_space(ringbuf);
 }
 
-int intel_ring_idle(struct intel_engine_cs *ring)
+int intel_ring_idle(struct intel_engine_cs *ring, bool flush)
 {
struct drm_i915_gem_request *req;
+   int ret;
+
+   /*
+* NB: Must not flush the scheduler if this idle request is from
+* within an execbuff submission (i.e. due to 'get_seqno' calling
+* 'wrap_seqno' calling 'idle'). As that would lead to recursive
+* flushes!
+*/
+   if (flush) {
+   ret = i915_scheduler_flush(ring, true);
+   if (ret)
+   return ret;
+   }
 
/* Wait upon the last request to be completed */
if (list_empty(ring-request_list))
@@ -2983,7 +2996,7 @@ intel_stop_ring_buffer(struct intel_engine_cs *ring)
if (!intel_ring_initialized(ring))
return;
 
-   ret = intel_ring_idle(ring);
+   ret = intel_ring_idle(ring, true);
if (ret  !i915_reset_in_progress(to_i915(ring-dev)-gpu_error))
DRM_ERROR(failed to quiesce %s whilst cleaning up: %d\n,
  ring-name, ret);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 9457774..2f30900 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -487,7 +487,7 @@ void intel_ring_update_space(struct intel_ringbuffer 
*ringbuf);
 int intel_ring_space(struct intel_ringbuffer *ringbuf);
 bool intel_ring_stopped(struct intel_engine_cs *ring);
 
-int __must_check intel_ring_idle(struct intel_engine_cs *ring);
+int __must_check intel_ring_idle(struct intel_engine_cs *ring, bool flush);
 void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno);
 int 

[Intel-gfx] [RFC 36/39] drm/i915: Add scheduler support functions for TDR

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

Change-Id: I720463f01c4edd3579ce52e315a85e4d7874d7e5
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_scheduler.c | 31 +++
 drivers/gpu/drm/i915/i915_scheduler.h |  1 +
 2 files changed, 32 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 7be1c89..631f4e6 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -1693,6 +1693,37 @@ int i915_scheduler_closefile(struct drm_device *dev, 
struct drm_file *file)
return 0;
 }
 
+/*
+ * Used by TDR to distinguish hung rings (not moving but with work to do)
+ * from idle rings (not moving because there is nothing to do).
+ */
+bool i915_scheduler_is_ring_flying(struct intel_engine_cs *ring)
+{
+   struct drm_i915_private *dev_priv = ring-dev-dev_private;
+   struct i915_scheduler   *scheduler = dev_priv-scheduler;
+   struct i915_scheduler_queue_entry *node;
+   unsigned long   flags;
+   boolfound = false;
+
+   /* With the scheduler in bypass mode, no information can be returned. */
+   if (i915.scheduler_override  i915_so_direct_submit) {
+   return true;
+   }
+
+   spin_lock_irqsave(scheduler-lock, flags);
+
+   list_for_each_entry(node, scheduler-node_queue[ring-id], link) {
+   if (I915_SQS_IS_FLYING(node)) {
+   found = true;
+   break;
+   }
+   }
+
+   spin_unlock_irqrestore(scheduler-lock, flags);
+
+   return found;
+}
+
 bool i915_scheduler_file_queue_is_full(struct drm_file *file)
 {
struct drm_i915_file_private *file_priv = file-driver_priv;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h 
b/drivers/gpu/drm/i915/i915_scheduler.h
index 6e6e3a0..2113e7d 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -154,6 +154,7 @@ int i915_scheduler_closefile(struct drm_device *dev,
 voidi915_gem_scheduler_clean_node(struct i915_scheduler_queue_entry 
*node);
 int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry 
*qe);
 int i915_scheduler_handle_irq(struct intel_engine_cs *ring);
+booli915_scheduler_is_ring_flying(struct intel_engine_cs *ring);
 voidi915_scheduler_kill_all(struct drm_device *dev);
 voidi915_gem_scheduler_work_handler(struct work_struct *work);
 #ifdef CONFIG_SYNC
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 33/39] drm/i915: Add early exit to execbuff_final() if insufficient ring space

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

One of the major purposes of the GPU scheduler is to avoid stalling the CPU when
the GPU is busy and unable to accept more work. This change adds support to the
ring submission code to allow a ring space check to be performed before
attempting to submit a batch buffer to the hardware. If insufficient space is
available then the scheduler can go away and come back later, letting the CPU
get on with other work, rather than stalling and waiting for the hardware to
catch up.

Change-Id: I267159ce1150cb6714d34a49b841bcbe4bf66326
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 42 --
 drivers/gpu/drm/i915/intel_lrc.c   | 57 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.c| 24 +
 drivers/gpu/drm/i915/intel_ringbuffer.h|  1 +
 4 files changed, 109 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c2a69d8..b701838 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1078,25 +1078,19 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
 {
struct intel_engine_cs *ring = req-ring;
struct drm_i915_private *dev_priv = dev-dev_private;
-   int ret, i;
+   int i;
 
if (!IS_GEN7(dev) || ring != dev_priv-ring[RCS]) {
DRM_DEBUG(sol reset is gen7/rcs only\n);
return -EINVAL;
}
 
-   ret = intel_ring_begin(req, 4 * 3);
-   if (ret)
-   return ret;
-
for (i = 0; i  4; i++) {
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
intel_ring_emit(ring, 0);
}
 
-   intel_ring_advance(ring);
-
return 0;
 }
 
@@ -1315,6 +1309,7 @@ int i915_gem_ringbuffer_submission_final(struct 
i915_execbuffer_params *params)
struct intel_engine_cs  *ring = params-ring;
u64 exec_start, exec_len;
int ret, i;
+   uint32_t min_space;
 
/* The mutex must be acquired before calling this function */
BUG_ON(!mutex_is_locked(params-dev-struct_mutex));
@@ -1336,8 +1331,36 @@ int i915_gem_ringbuffer_submission_final(struct 
i915_execbuffer_params *params)
if (ret)
return ret;
 
+   /*
+* It would be a bad idea to run out of space while writing commands
+* to the ring. One of the major aims of the scheduler is to not stall
+* at any point for any reason. However, doing an early exit half way
+* through submission could result in a partial sequence being written
+* which would leave the engine in an unknown state. Therefore, check in
+* advance that there will be enough space for the entire submission
+* whether emitted by the code below OR by any other functions that may
+* be executed before the end of final().
+*
+* NB: This test deliberately overestimates, because that's easier than
+* tracing every potential path that could be taken!
+*
+* Current measurements suggest that we may need to emit up to 744 bytes
+* (186 dwords), so this is rounded up to 256 dwords here. Then we 
double
+* that to get the free space requirement, because the block isn't 
allowed
+* to span the transition from the end to the beginning of the ring.
+*/
+#define I915_BATCH_EXEC_MAX_LEN 256/* max dwords emitted here  
*/
+   min_space = I915_BATCH_EXEC_MAX_LEN * 2 * sizeof(uint32_t);
+   ret = intel_ring_test_space(params-request-ringbuf, min_space);
+   if (ret)
+   goto early_error;
+
intel_runtime_pm_get(dev_priv);
 
+   ret = intel_ring_begin(params-request, I915_BATCH_EXEC_MAX_LEN);
+   if (ret)
+   goto error;
+
/*
 * Unconditionally invalidate gpu caches and ensure that we do flush
 * any residual writes from the previous batch.
@@ -1356,10 +1379,6 @@ int i915_gem_ringbuffer_submission_final(struct 
i915_execbuffer_params *params)
 
if (ring == dev_priv-ring[RCS] 
params-instp_mode != 
dev_priv-relative_constants_mode) {
-   ret = intel_ring_begin(params-request, 4);
-   if (ret)
-   goto error;
-
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit(ring, INSTPM);
@@ -1411,6 +1430,7 @@ error:
 */
intel_runtime_pm_put(dev_priv);
 
+early_error:
if (ret)
intel_ring_reserved_space_cancel(params-request-ringbuf);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 41dca2a..3cedddb 100644
--- 

[Intel-gfx] [RFC 08/39] drm/i915: Prepare retire_requests to handle out-of-order seqnos

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

A major point of the GPU scheduler is that it re-orders batch buffers after they
have been submitted to the driver. This leads to requests completing out of
order. In turn, this means that the retire processing can no longer assume that
all completed entries are at the front of the list. Rather than attempting to
re-order the request list on a regular basis, it is better to simply scan the
entire list.

There is also a problem with doing the free of the request before the move to
inactive. Thus the requests are now moved to a temporary list first, then the
objects de-activated and finally the requests on the temporary list are freed.

Change-Id: I7eb6793581d9d28eb832e0e94c116b7202fa1b26
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c | 54 +++--
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3fbc6ec..56405cd 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3171,6 +3171,10 @@ void i915_gem_reset(struct drm_device *dev)
 void
 i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 {
+   struct drm_i915_gem_object *obj, *obj_next;
+   struct drm_i915_gem_request *req, *req_next;
+   LIST_HEAD(deferred_request_free);
+
WARN_ON(i915_verify_lists(ring-dev));
 
/*
@@ -3180,37 +3184,31 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
*ring)
 */
i915_gem_request_notify(ring);
 
+   /*
+* Note that request entries might be out of order due to rescheduling
+* and pre-emption. Thus both lists must be processed in their entirety
+* rather than stopping at the first non-complete entry.
+*/
+
/* Retire requests first as we use it above for the early return.
 * If we retire requests last, we may use a later seqno and so clear
 * the requests lists without clearing the active list, leading to
 * confusion.
 */
-   while (!list_empty(ring-request_list)) {
-   struct drm_i915_gem_request *request;
-
-   request = list_first_entry(ring-request_list,
-  struct drm_i915_gem_request,
-  list);
-
-   if (!i915_gem_request_completed(request))
-   break;
+   list_for_each_entry_safe(req, req_next, ring-request_list, list) {
+   if (!i915_gem_request_completed(req))
+   continue;
 
-   i915_gem_request_retire(request);
+   list_move_tail(req-list, deferred_request_free);
}
 
/* Move any buffers on the active list that are no longer referenced
 * by the ringbuffer to the flushing/inactive lists as appropriate,
 * before we free the context associated with the requests.
 */
-   while (!list_empty(ring-active_list)) {
-   struct drm_i915_gem_object *obj;
-
-   obj = list_first_entry(ring-active_list,
- struct drm_i915_gem_object,
- ring_list[ring-id]);
-
+   list_for_each_entry_safe(obj, obj_next, ring-active_list, 
ring_list[ring-id]) {
if (!list_empty(obj-last_read_req[ring-id]-list))
-   break;
+   continue;
 
i915_gem_object_retire__read(obj, ring-id);
}
@@ -3222,18 +3220,26 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
*ring)
}
 
while (!list_empty(ring-delayed_free_list)) {
-   struct drm_i915_gem_request *request;
unsigned long flags;
 
-   request = list_first_entry(ring-delayed_free_list,
-  struct drm_i915_gem_request,
-  delay_free_list);
+   req = list_first_entry(ring-delayed_free_list,
+  struct drm_i915_gem_request,
+  delay_free_list);
 
spin_lock_irqsave(ring-delayed_free_lock, flags);
-   list_del(request-delay_free_list);
+   list_del(req-delay_free_list);
spin_unlock_irqrestore(ring-delayed_free_lock, flags);
 
-   i915_gem_request_free(request);
+   i915_gem_request_free(req);
+   }
+
+   /* It should now be safe to actually free the requests */
+   while (!list_empty(deferred_request_free)) {
+   req = list_first_entry(deferred_request_free,
+  struct drm_i915_gem_request,
+  list);
+
+   i915_gem_request_retire(req);
}
 
WARN_ON(i915_verify_lists(ring-dev));
-- 
1.9.1


[Intel-gfx] [RFC 34/39] drm/i915: Added scheduler statistic reporting to debugfs

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

It is useful for know what the scheduler is doing for both debugging and
performance analysis purposes. This change adds a bunch of counters and such
that keep track of various scheduler operations (batches submitted, completed,
flush requests, etc.). The data can then be read in userland via the debugfs
mechanism.

Change-Id: I3266c631cd70c9eeb2c235f88f493e60462f85d7
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c| 76 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 -
 drivers/gpu/drm/i915/i915_scheduler.c  | 71 ++--
 drivers/gpu/drm/i915/i915_scheduler.h  | 35 ++
 4 files changed, 189 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 028fa8f..3c5c750 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3258,6 +3258,81 @@ static int i915_drrs_status(struct seq_file *m, void 
*unused)
return 0;
 }
 
+static int i915_scheduler_info(struct seq_file *m, void *unused)
+{
+   struct drm_info_node *node = (struct drm_info_node *) m-private;
+   struct drm_device *dev = node-minor-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct i915_scheduler   *scheduler = dev_priv-scheduler;
+   struct i915_scheduler_stats *stats = scheduler-stats;
+   struct i915_scheduler_stats_nodes node_stats[I915_NUM_RINGS];
+   struct intel_engine_cs *ring;
+   char   str[50 * (I915_NUM_RINGS + 1)], name[50], *ptr;
+   int ret, i, r;
+
+   ret = mutex_lock_interruptible(dev-mode_config.mutex);
+   if (ret)
+   return ret;
+
+#define PRINT_VAR(name, fmt, var)  \
+   do {\
+   sprintf(str, %-22s, name);\
+   ptr = str + strlen(str);\
+   for_each_ring(ring, dev_priv, r) {  \
+   sprintf(ptr,  %10 fmt, var);  \
+   ptr += strlen(ptr); \
+   }   \
+   seq_printf(m, %s\n, str); \
+   } while (0)
+
+   PRINT_VAR(Ring name:, s, dev_priv-ring[r].name);
+   PRINT_VAR(  Ring seqno,   d, ring-get_seqno(ring, false));
+   seq_putc(m, '\n');
+
+   seq_puts(m, Batch submissions:\n);
+   PRINT_VAR(  Queued,   u, stats[r].queued);
+   PRINT_VAR(  Submitted,u, stats[r].submitted);
+   PRINT_VAR(  Completed,u, stats[r].completed);
+   PRINT_VAR(  Expired,  u, stats[r].expired);
+   seq_putc(m, '\n');
+
+   seq_puts(m, Flush counts:\n);
+   PRINT_VAR(  By object,u, stats[r].flush_obj);
+   PRINT_VAR(  By request,   u, stats[r].flush_req);
+   PRINT_VAR(  Blanket,  u, stats[r].flush_all);
+   PRINT_VAR(  Entries bumped,   u, stats[r].flush_bump);
+   PRINT_VAR(  Entries submitted,u, stats[r].flush_submit);
+   seq_putc(m, '\n');
+
+   seq_puts(m, Miscellaneous:\n);
+   PRINT_VAR(  ExecEarly retry,  u, stats[r].exec_early);
+   PRINT_VAR(  ExecFinal requeue,u, stats[r].exec_again);
+   PRINT_VAR(  ExecFinal killed, u, stats[r].exec_dead);
+   PRINT_VAR(  Fence wait,   u, stats[r].fence_wait);
+   PRINT_VAR(  Fence wait again, u, stats[r].fence_again);
+   PRINT_VAR(  Fence wait ignore,u, stats[r].fence_ignore);
+   PRINT_VAR(  Fence supplied,   u, stats[r].fence_got);
+   PRINT_VAR(  Hung flying,  u, stats[r].kill_flying);
+   PRINT_VAR(  Hung queued,  u, stats[r].kill_queued);
+   seq_putc(m, '\n');
+
+   seq_puts(m, Queue contents:\n);
+   for_each_ring(ring, dev_priv, i)
+   i915_scheduler_query_stats(ring, node_stats + ring-id);
+
+   for (i = 0; i  (i915_sqs_MAX + 1); i++) {
+   sprintf(name,   %s, i915_scheduler_queue_status_str(i));
+   PRINT_VAR(name, d, node_stats[r].counts[i]);
+   }
+   seq_putc(m, '\n');
+
+#undef PRINT_VAR
+
+   mutex_unlock(dev-mode_config.mutex);
+
+   return 0;
+}
+
 struct pipe_crc_info {
const char *name;
struct drm_device *dev;
@@ -5250,6 +5325,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{i915_semaphore_status, i915_semaphore_status, 0},
{i915_shared_dplls_info, i915_shared_dplls_info, 0},
{i915_dp_mst_info, i915_dp_mst_info, 0},
+   {i915_scheduler_info, i915_scheduler_info, 0},
{i915_wa_registers, i915_wa_registers, 0},

[Intel-gfx] [RFC 28/39] drm/i915: Connecting execbuff fences to scheduler

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The scheduler now supports sync framework fences being associated with
batch buffers. The execbuff IOCTL allows such fences to be passed in
from user land. This patch wires the two together so that the IOCTL no
longer needs to stall on the fence immediately. Instead the stall is
now swallowed by the scheduler's scheduling algorithm.

For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 21 -
 drivers/gpu/drm/i915/i915_scheduler.c  |  3 +++
 drivers/gpu/drm/i915/i915_scheduler.h  |  5 +
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1642701..1325b19 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1612,7 +1612,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/*
 * Without a GPU scheduler, any fence waits must be done up front.
 */
-   if (args-flags  I915_EXEC_WAIT_FENCE) {
+   if ((args-flags  I915_EXEC_WAIT_FENCE) 
+   (i915.scheduler_override  i915_so_direct_submit))
+   {
ret = i915_early_fence_wait(ring, fd_fence_wait);
if (ret  0)
return ret;
@@ -1799,6 +1801,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
params-ctx = ctx;
 
 #ifdef CONFIG_SYNC
+   if (args-flags  I915_EXEC_WAIT_FENCE) {
+   if (fd_fence_wait  0) {
+   DRM_ERROR(Wait fence for ring %d has invalid id %d\n,
+ (int) ring-id, fd_fence_wait);
+   } else {
+   params-fence_wait = sync_fence_fdget(fd_fence_wait);
+   if (params-fence_wait == NULL)
+   DRM_ERROR(Invalid wait fence %d\n,
+ fd_fence_wait);
+   }
+   }
+
if (args-flags  I915_EXEC_CREATE_FENCE) {
/*
 * Caller has requested a sync fence.
@@ -1865,6 +1879,11 @@ err:
i915_gem_context_unreference(params-ctx);
}
 
+#ifdef CONFIG_SYNC
+   if (params-fence_wait)
+   sync_fence_put(params-fence_wait);
+#endif
+
/*
 * If the request was created but not successfully submitted then it
 * must be freed again. If it was submitted then it is being tracked
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 19577c9..66dbc20 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -977,6 +977,9 @@ static int i915_scheduler_pop_from_queue_locked(struct 
intel_engine_cs *ring,
signalled = 
atomic_read(node-params.fence_wait-status) != 0;
else
signalled = true;
+
+   if (!signalled)
+   signalled = i915_safe_to_ignore_fence(ring, 
node-params.fence_wait);
 #endif // CONFIG_SYNC
 
has_local  = false;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h 
b/drivers/gpu/drm/i915/i915_scheduler.h
index 8ca4b4b..3f94512 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -110,6 +110,11 @@ int i915_scheduler_queue_execbuffer(struct 
i915_scheduler_queue_entry *q
 int i915_scheduler_handle_irq(struct intel_engine_cs *ring);
 voidi915_scheduler_kill_all(struct drm_device *dev);
 voidi915_gem_scheduler_work_handler(struct work_struct *work);
+#ifdef CONFIG_SYNC
+struct drm_i915_gem_request *i915_scheduler_find_by_sync_value(struct 
intel_engine_cs *ring,
+  struct 
intel_context *ctx,
+  uint32_t 
sync_value);
+#endif
 int i915_scheduler_flush(struct intel_engine_cs *ring, bool is_locked);
 int i915_scheduler_flush_request(struct drm_i915_gem_request *req,
 bool is_locked);
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 22/39] drm/i915: Add scheduler hook to GPU reset

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

When the watchdog resets the GPU, the scheduler needs to know so that it can
clean up it's view of the world. All in flight nodes must be marked as dead so
that the scheduler does not wait forever for them to complete. Also, all queued
nodes must be marked as dead so that the scheduler does not dead lock the reset
code by saying that the ring can not be idled and it must come back again later.

Change-Id: I184eb59c5c1a1385f9c17db66c7cc46f8904eebd
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c   |  2 ++
 drivers/gpu/drm/i915/i915_scheduler.c | 63 ---
 drivers/gpu/drm/i915/i915_scheduler.h |  8 -
 3 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6142e68..6d72caa 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3187,6 +3187,8 @@ void i915_gem_reset(struct drm_device *dev)
struct intel_engine_cs *ring;
int i;
 
+   i915_scheduler_kill_all(dev);
+
/*
 * Before we free the objects from the requests, we need to inspect
 * them for finding the guilty party. As the requests only borrow
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 73c9ba6..3155f42 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -341,6 +341,56 @@ static void i915_scheduler_node_kill(struct 
i915_scheduler_queue_entry *node)
node-status = i915_sqs_dead;
 }
 
+/* Abandon a queued node completely. For example because the driver is being
+ * reset and it is not valid to preserve absolutely any state at all across the
+ * reinitialisation sequence. */
+static void i915_scheduler_node_kill_queued(struct i915_scheduler_queue_entry 
*node)
+{
+   BUG_ON(!node);
+   BUG_ON(!I915_SQS_IS_QUEUED(node));
+
+   node-status = i915_sqs_dead;
+}
+
+/* The system is toast. Terminate all nodes with extreme prejudice. */
+void i915_scheduler_kill_all(struct drm_device *dev)
+{
+   struct i915_scheduler_queue_entry   *node;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct i915_scheduler   *scheduler = dev_priv-scheduler;
+   unsigned long   flags;
+   int r;
+
+   spin_lock_irqsave(scheduler-lock, flags);
+
+   for (r = 0; r  I915_NUM_RINGS; r++) {
+   list_for_each_entry(node, scheduler-node_queue[r], link) {
+   switch (node-status) {
+   case I915_SQS_CASE_COMPLETE:
+   break;
+
+   case I915_SQS_CASE_FLYING:
+   i915_scheduler_node_kill(node);
+   break;
+
+   case I915_SQS_CASE_QUEUED:
+   i915_scheduler_node_kill_queued(node);
+   break;
+
+   default:
+   /* Wot no state?! */
+   BUG();
+   }
+   }
+   }
+
+   memset(scheduler-last_irq_seqno, 0x00, 
sizeof(scheduler-last_irq_seqno));
+
+   spin_unlock_irqrestore(scheduler-lock, flags);
+
+   queue_work(dev_priv-wq, dev_priv-mm.scheduler_work);
+}
+
 /*
  * The batch tagged with the indicated seqence number has completed.
  * Search the queue for it, update its status and those of any batches
@@ -912,7 +962,7 @@ static int i915_scheduler_submit(struct intel_engine_cs 
*ring, bool was_locked)
scheduler-flags[ring-id] = ~i915_sf_submitting;
 
if (ret) {
-   bool requeue = true;
+   int requeue = 1;
 
/* Oh dear! Either the node is broken or the ring is
 * busy. So need to kill the node or requeue it and try
@@ -922,7 +972,7 @@ static int i915_scheduler_submit(struct intel_engine_cs 
*ring, bool was_locked)
case ENODEV:
case ENOENT:
/* Fatal errors. Kill the node. */
-   requeue = false;
+   requeue = -1;
break;
 
case EAGAIN:
@@ -941,13 +991,18 @@ static int i915_scheduler_submit(struct intel_engine_cs 
*ring, bool was_locked)
break;
}
 
-   if (requeue) {
+   /* Check that the watchdog/reset code has not nuked
+* the node while we weren't looking: */
+   if (node-status == i915_sqs_dead)
+   requeue = 0;
+
+   if (requeue == 1) {
i915_scheduler_node_requeue(node);
 

[Intel-gfx] [RFC 11/39] drm/i915: Force MMIO flips when scheduler enabled

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

Change-Id: Ice071af6d88306b0d1c53bdb651a1a3e20bdc1af
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/intel_display.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index b9e8113..9629dab 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -44,6 +44,7 @@
 #include drm/drm_plane_helper.h
 #include drm/drm_rect.h
 #include linux/dma_remapping.h
+#include i915_scheduler.h
 
 /* Primary plane formats for gen = 3 */
 static const uint32_t i8xx_primary_formats[] = {
@@ -11180,6 +11181,8 @@ static bool use_mmio_flip(struct intel_engine_cs *ring,
return true;
else if (i915.enable_execlists)
return true;
+   else if (i915_scheduler_is_enabled(ring-dev))
+   return true;
else
return ring != i915_gem_request_get_ring(obj-last_write_req);
 }
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 02/39] drm/i915: Updating assorted register and status page definitions

2015-07-17 Thread John . C . Harrison
From: Dave Gordon david.s.gor...@intel.com

Added various definitions that will be useful for the scheduler in general and
pre-emptive context switching in particular.

Change-Id: Ica805b94160426def51f5d520f5ce51c60864a98
For: VIZ-1587
Signed-off-by: Dave Gordon david.s.gor...@intel.com
---
 drivers/gpu/drm/i915/i915_reg.h | 30 -
 drivers/gpu/drm/i915/intel_ringbuffer.h | 40 +++--
 2 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e9a95df..ae3e9f7 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -250,6 +250,10 @@
 #define  MI_GLOBAL_GTT(122)
 
 #define MI_NOOPMI_INSTR(0, 0)
+#define   MI_NOOP_WRITE_ID (122)
+#define   MI_NOOP_ID_MASK  ((122) - 1)
+#define   MI_NOOP_MID(id)  ((id)  MI_NOOP_ID_MASK)
+#define MI_NOOP_WITH_ID(id)MI_INSTR(0, MI_NOOP_WRITE_ID|MI_NOOP_MID(id))
 #define MI_USER_INTERRUPT  MI_INSTR(0x02, 0)
 #define MI_WAIT_FOR_EVENT   MI_INSTR(0x03, 0)
 #define   MI_WAIT_FOR_OVERLAY_FLIP (116)
@@ -267,6 +271,7 @@
 #define MI_ARB_ON_OFF  MI_INSTR(0x08, 0)
 #define   MI_ARB_ENABLE(10)
 #define   MI_ARB_DISABLE   (00)
+#define MI_ARB_CHECK   MI_INSTR(0x05, 0)
 #define MI_BATCH_BUFFER_ENDMI_INSTR(0x0a, 0)
 #define MI_SUSPEND_FLUSH   MI_INSTR(0x0b, 0)
 #define   MI_SUSPEND_FLUSH_EN  (10)
@@ -316,6 +321,8 @@
 #define   MI_SEMAPHORE_SYNC_INVALID (316)
 #define   MI_SEMAPHORE_SYNC_MASK(316)
 #define MI_SET_CONTEXT MI_INSTR(0x18, 0)
+#define   MI_CONTEXT_ADDR_MASK ((~0)12)
+#define   MI_SET_CONTEXT_FLAG_MASK ((112)-1)
 #define   MI_MM_SPACE_GTT  (18)
 #define   MI_MM_SPACE_PHYSICAL (08)
 #define   MI_SAVE_EXT_STATE_EN (13)
@@ -335,6 +342,10 @@
 #define   MI_USE_GGTT  (1  22) /* g4x+ */
 #define MI_STORE_DWORD_INDEX   MI_INSTR(0x21, 1)
 #define   MI_STORE_DWORD_INDEX_SHIFT 2
+#define MI_STORE_REG_MEM   MI_INSTR(0x24, 1)
+#define   MI_STORE_REG_MEM_GTT (1  22)
+#define   MI_STORE_REG_MEM_PREDICATE   (1  21)
+
 /* Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:
  * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw
  *   simply ignores the register load under certain conditions.
@@ -349,7 +360,10 @@
 #define MI_FLUSH_DWMI_INSTR(0x26, 1) /* for GEN6 */
 #define   MI_FLUSH_DW_STORE_INDEX  (121)
 #define   MI_INVALIDATE_TLB(118)
+#define   MI_FLUSH_DW_OP_NONE  (014)
 #define   MI_FLUSH_DW_OP_STOREDW   (114)
+#define   MI_FLUSH_DW_OP_RSVD  (214)
+#define   MI_FLUSH_DW_OP_STAMP (314)
 #define   MI_FLUSH_DW_OP_MASK  (314)
 #define   MI_FLUSH_DW_NOTIFY   (18)
 #define   MI_INVALIDATE_BSD(17)
@@ -1491,6 +1505,19 @@ enum skl_disp_power_wells {
 
 #define HSW_GTT_CACHE_EN   0x4024
 #define   GTT_CACHE_EN_ALL 0xF0007FFF
+
+/*
+ * Premption-related registers
+ */
+#define RING_UHPTR(base)   ((base)+0x134)
+#define   UHPTR_GFX_ADDR_ALIGN (0x7)
+#define   UHPTR_VALID  (0x1)
+#define RING_PREEMPT_ADDR  0x0214c
+#define   PREEMPT_BATCH_LEVEL_MASK (0x3)
+#define BB_PREEMPT_ADDR0x02148
+#define SBB_PREEMPT_ADDR   0x0213c
+#define RS_PREEMPT_STATUS  0x0215c
+
 #define GEN7_WR_WATERMARK  0x4028
 #define GEN7_GFX_PRIO_CTRL 0x402C
 #define ARB_MODE   0x4030
@@ -6612,7 +6639,8 @@ enum skl_disp_power_wells {
 #define  VLV_SPAREG2H  0xA194
 
 #define  GTFIFODBG 0x12
-#defineGT_FIFO_SBDROPERR   (16)
+#defineGT_FIFO_CPU_ERROR_MASK  0xf
+#defineGT_FIFO_SDDROPERR   (16)
 #defineGT_FIFO_BLOBDROPERR (15)
 #defineGT_FIFO_SB_READ_ABORTERR(14)
 #defineGT_FIFO_DROPERR (13)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2e68b73..9457774 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -49,6 +49,12 @@ struct  intel_hw_status_page {
 #define I915_READ_MODE(ring) I915_READ(RING_MI_MODE((ring)-mmio_base))
 #define I915_WRITE_MODE(ring, val) I915_WRITE(RING_MI_MODE((ring)-mmio_base), 
val)
 
+#define I915_READ_UHPTR(ring) \
+   I915_READ(RING_UHPTR((ring)-mmio_base))
+#define I915_WRITE_UHPTR(ring, val) \
+   I915_WRITE(RING_UHPTR((ring)-mmio_base), val)
+#define I915_READ_NOPID(ring) I915_READ(RING_NOPID((ring)-mmio_base))
+
 /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW 
to
  * do the writes, and that must have qw aligned offsets, simply pretend it's 
8b.
  */
@@ -415,10 +421,40 @@ intel_write_status_page(struct 

[Intel-gfx] [RFC 21/39] drm/i915: Added scheduler flush calls to ring throttle and idle functions

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

When requesting that all GPU work is completed, it is now necessary to get the
scheduler involved in order to flush out work that queued and not yet submitted.

Change-Id: I95dcc2a2ee5c1a844748621c333994ddd6cf6a66
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c   | 17 -
 drivers/gpu/drm/i915/i915_scheduler.c | 45 +++
 drivers/gpu/drm/i915/i915_scheduler.h |  1 +
 3 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index dd9ebbe..6142e68 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3710,6 +3710,10 @@ int i915_gpu_idle(struct drm_device *dev)
 
/* Flush everything onto the inactive list. */
for_each_ring(ring, dev_priv, i) {
+   ret = i915_scheduler_flush(ring, true);
+   if (ret  0)
+   return ret;
+
if (!i915.enable_execlists) {
struct drm_i915_gem_request *req;
 
@@ -4679,7 +4683,8 @@ i915_gem_ring_throttle(struct drm_device *dev, struct 
drm_file *file)
unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
struct drm_i915_gem_request *request, *target = NULL;
unsigned reset_counter;
-   int ret;
+   int i, ret;
+   struct intel_engine_cs *ring;
 
ret = i915_gem_wait_for_error(dev_priv-gpu_error);
if (ret)
@@ -4689,6 +4694,16 @@ i915_gem_ring_throttle(struct drm_device *dev, struct 
drm_file *file)
if (ret)
return ret;
 
+   for_each_ring(ring, dev_priv, i) {
+   /* Need a mechanism to flush out scheduler entries that were
+* submitted more than 'recent_enough' time ago as well! In the
+* meantime, just flush everything out to ensure that entries
+* can not sit around indefinitely. */
+   ret = i915_scheduler_flush(ring, false);
+   if (ret  0)
+   return ret;
+   }
+
spin_lock(file_priv-mm.lock);
list_for_each_entry(request, file_priv-mm.request_list, client_list) {
if (time_after_eq(request-emitted_jiffies, recent_enough))
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 811cbe4..73c9ba6 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -653,6 +653,51 @@ int i915_scheduler_flush_request(struct 
drm_i915_gem_request *req,
return flush_count;
 }
 
+int i915_scheduler_flush(struct intel_engine_cs *ring, bool is_locked)
+{
+   struct i915_scheduler_queue_entry *node;
+   struct drm_i915_private   *dev_priv;
+   struct i915_scheduler *scheduler;
+   unsigned long   flags;
+   boolfound;
+   int ret;
+   uint32_tcount = 0;
+
+   if (!ring)
+   return -EINVAL;
+
+   dev_priv  = ring-dev-dev_private;
+   scheduler = dev_priv-scheduler;
+
+   if (!scheduler)
+   return 0;
+
+   BUG_ON(is_locked  (scheduler-flags[ring-id]  i915_sf_submitting));
+
+   do {
+   found = false;
+   spin_lock_irqsave(scheduler-lock, flags);
+   list_for_each_entry(node, scheduler-node_queue[ring-id], 
link) {
+   if (!I915_SQS_IS_QUEUED(node))
+   continue;
+
+   found = true;
+   break;
+   }
+   spin_unlock_irqrestore(scheduler-lock, flags);
+
+   if (found) {
+   ret = i915_scheduler_submit(ring, is_locked);
+   if (ret  0)
+   return ret;
+
+   count += ret;
+   }
+   } while (found);
+
+   return count;
+}
+
 static void i915_scheduler_priority_bump_clear(struct i915_scheduler 
*scheduler)
 {
struct i915_scheduler_queue_entry *node;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h 
b/drivers/gpu/drm/i915/i915_scheduler.h
index fcf2640..5e094d5 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -92,6 +92,7 @@ voidi915_gem_scheduler_clean_node(struct 
i915_scheduler_queue_entry *nod
 int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry 
*qe);
 int i915_scheduler_handle_irq(struct intel_engine_cs *ring);
 voidi915_gem_scheduler_work_handler(struct work_struct *work);
+int i915_scheduler_flush(struct intel_engine_cs *ring, bool is_locked);
 int i915_scheduler_flush_request(struct drm_i915_gem_request *req,
 bool is_locked);
 booli915_scheduler_is_request_tracked(struct drm_i915_gem_request 

[Intel-gfx] [RFC 04/39] drm/i915: Prelude to splitting i915_gem_do_execbuffer in two

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The scheduler decouples the submission of batch buffers to the driver with their
submission to the hardware. This basically means splitting the execbuffer()
function in half. This change rearranges some code ready for the split to occur.

Change-Id: Icc9c8afaac18821f3eb8a151a49f918f90c068a3
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 57 ++
 drivers/gpu/drm/i915/intel_lrc.c   | 18 +++---
 2 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d95d472..988ecd4 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -926,10 +926,7 @@ i915_gem_execbuffer_move_to_gpu(struct 
drm_i915_gem_request *req,
if (flush_domains  I915_GEM_DOMAIN_GTT)
wmb();
 
-   /* Unconditionally invalidate gpu caches and ensure that we do flush
-* any residual writes from the previous batch.
-*/
-   return intel_ring_invalidate_all_caches(req);
+   return 0;
 }
 
 static bool
@@ -1253,17 +1250,6 @@ i915_gem_ringbuffer_submission(struct 
i915_execbuffer_params *params,
}
}
 
-   ret = i915_gem_execbuffer_move_to_gpu(params-request, vmas);
-   if (ret)
-   goto error;
-
-   ret = i915_switch_context(params-request);
-   if (ret)
-   goto error;
-
-   WARN(params-ctx-ppgtt  params-ctx-ppgtt-pd_dirty_rings  
(1ring-id),
-%s didn't clear reload\n, ring-name);
-
instp_mode = args-flags  I915_EXEC_CONSTANTS_MASK;
instp_mask = I915_EXEC_CONSTANTS_MASK;
switch (instp_mode) {
@@ -1301,6 +1287,32 @@ i915_gem_ringbuffer_submission(struct 
i915_execbuffer_params *params,
goto error;
}
 
+   ret = i915_gem_execbuffer_move_to_gpu(params-request, vmas);
+   if (ret)
+   goto error;
+
+   i915_gem_execbuffer_move_to_active(vmas, params-request);
+
+   /* To be split into two functions here... */
+
+   intel_runtime_pm_get(dev_priv);
+
+   /*
+* Unconditionally invalidate gpu caches and ensure that we do flush
+* any residual writes from the previous batch.
+*/
+   ret = intel_ring_invalidate_all_caches(params-request);
+   if (ret)
+   goto error;
+
+   /* Switch to the correct context for the batch */
+   ret = i915_switch_context(params-request);
+   if (ret)
+   goto error;
+
+   WARN(params-ctx-ppgtt  params-ctx-ppgtt-pd_dirty_rings  
(1ring-id),
+%s didn't clear reload\n, ring-name);
+
if (ring == dev_priv-ring[RCS] 
instp_mode != dev_priv-relative_constants_mode) {
ret = intel_ring_begin(params-request, 4);
@@ -1344,15 +1356,20 @@ i915_gem_ringbuffer_submission(struct 
i915_execbuffer_params *params,
exec_start, exec_len,
params-dispatch_flags);
if (ret)
-   return ret;
+   goto error;
}
 
trace_i915_gem_ring_dispatch(params-request, params-dispatch_flags);
 
-   i915_gem_execbuffer_move_to_active(vmas, params-request);
i915_gem_execbuffer_retire_commands(params);
 
 error:
+   /*
+* intel_gpu_busy should also get a ref, so it will free when the device
+* is really idle.
+*/
+   intel_runtime_pm_put(dev_priv);
+
kfree(cliprects);
return ret;
 }
@@ -1563,8 +1580,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
}
 #endif
 
-   intel_runtime_pm_get(dev_priv);
-
ret = i915_mutex_lock_interruptible(dev);
if (ret)
goto pre_mutex_err;
@@ -1759,10 +1774,6 @@ err:
mutex_unlock(dev-struct_mutex);
 
 pre_mutex_err:
-   /* intel_gpu_busy should also get a ref, so it will free when the device
-* is really idle. */
-   intel_runtime_pm_put(dev_priv);
-
if (fd_fence_complete != -1) {
sys_close(fd_fence_complete);
args-rsvd2 = (__u64) -1;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8aa9a18..89f3bcd 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -613,10 +613,7 @@ static int execlists_move_to_gpu(struct 
drm_i915_gem_request *req,
if (flush_domains  I915_GEM_DOMAIN_GTT)
wmb();
 
-   /* Unconditionally invalidate gpu caches and ensure that we do flush
-* any residual writes from the previous batch.
-*/
-   return logical_ring_invalidate_all_caches(req);
+   return 0;
 }
 
 int intel_logical_ring_alloc_request_extras(struct 

[Intel-gfx] [RFC 19/39] drm/i915: Added scheduler support to __wait_request() calls

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The scheduler can cause batch buffers, and hence requests, to be submitted to
the ring out of order and asynchronously to their submission to the driver. Thus
at the point of waiting for the completion of a given request, it is not even
guaranteed that the request has actually been sent to the hardware yet. Even it
is has been sent, it is possible that it could be pre-empted and thus 'unsent'.

This means that it is necessary to be able to submit requests to the hardware
during the wait call itself. Unfortunately, while some callers of
__wait_request() release the mutex lock first, others do not (and apparently can
not). Hence there is the ability to deadlock as the wait stalls for submission
but the asynchronous submission is stalled for the mutex lock.

This change hooks the scheduler in to the __wait_request() code to ensure
correct behaviour. That is, flush the target batch buffer through to the
hardware and do not deadlock waiting for something that cannot currently be
submitted. Instead, the wait call must return EAGAIN at least as far back as
necessary to release the mutex lock and allow the scheduler's asynchronous
processing to get in and handle the pre-emption operation and eventually
(re-)submit the work.

Change-Id: I31fe6bc7e38f6ffdd843fcae16e7cc8b1e52a931
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h |  3 +-
 drivers/gpu/drm/i915/i915_gem.c | 37 +++---
 drivers/gpu/drm/i915/i915_scheduler.c   | 91 +
 drivers/gpu/drm/i915/i915_scheduler.h   |  2 +
 drivers/gpu/drm/i915/intel_display.c|  3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |  2 +-
 6 files changed, 128 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2b3fab6..e9e0736 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2972,7 +2972,8 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
unsigned reset_counter,
bool interruptible,
s64 *timeout,
-   struct intel_rps_client *rps);
+   struct intel_rps_client *rps,
+   bool is_locked);
 int __must_check i915_wait_request(struct drm_i915_gem_request *req);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int __must_check
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cb5af5d..f713cda 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1219,7 +1219,8 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
unsigned reset_counter,
bool interruptible,
s64 *timeout,
-   struct intel_rps_client *rps)
+   struct intel_rps_client *rps,
+   bool is_locked)
 {
struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
struct drm_device *dev = ring-dev;
@@ -1229,8 +1230,10 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
DEFINE_WAIT(wait);
unsigned long timeout_expire;
s64 before, now;
-   int ret;
+   int ret = 0;
+   boolbusy;
 
+   might_sleep();
WARN(!intel_irqs_enabled(dev_priv), IRQs disabled);
 
if (list_empty(req-list))
@@ -1281,6 +1284,22 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
break;
}
 
+   if (is_locked) {
+   /* If this request is being processed by the scheduler
+* then it is unsafe to sleep with the mutex lock held
+* as the scheduler may require the lock in order to
+* progress the request. */
+   if (i915_scheduler_is_request_tracked(req, NULL, 
busy)) {
+   if (busy) {
+   ret = -EAGAIN;
+   break;
+   }
+   }
+
+   /* If the request is not tracked by the scheduler then 
the
+* regular test can be done. */
+   }
+
if (i915_gem_request_completed(req)) {
ret = 0;
break;
@@ -1452,13 +1471,17 @@ i915_wait_request(struct drm_i915_gem_request *req)
 
BUG_ON(!mutex_is_locked(dev-struct_mutex));
 
+   ret = i915_scheduler_flush_request(req, true);
+   if (ret  0)
+   return ret;
+
ret = i915_gem_check_wedge(dev_priv-gpu_error, interruptible);
if (ret)
return ret;
 
ret = __i915_wait_request(req,
  

[Intel-gfx] [RFC 23/39] drm/i915: Added a module parameter for allowing scheduler overrides

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

It can be useful to be able to disable certain features (e.g. the entire
scheduler) via a module parameter for debugging purposes. A parameter has the
advantage of not being a compile time switch but without implying that it can be
changed dynamically at runtime.

Change-Id: I92f4c832be88f5b34b49b90d6a9903fac68f7004
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h   | 1 +
 drivers/gpu/drm/i915/i915_params.c| 4 
 drivers/gpu/drm/i915/i915_scheduler.c | 7 +--
 drivers/gpu/drm/i915/i915_scheduler.h | 5 +
 4 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e9e0736..30552cc 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2664,6 +2664,7 @@ struct i915_params {
bool verbose_state_checks;
bool nuclear_pageflip;
int edp_vswing;
+   int scheduler_override;
 };
 extern struct i915_params i915 __read_mostly;
 
diff --git a/drivers/gpu/drm/i915/i915_params.c 
b/drivers/gpu/drm/i915/i915_params.c
index 7983fe4..a5320ff 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -53,6 +53,7 @@ struct i915_params i915 __read_mostly = {
.verbose_state_checks = 1,
.nuclear_pageflip = 0,
.edp_vswing = 0,
+   .scheduler_override = 1,
 };
 
 module_param_named(modeset, i915.modeset, int, 0400);
@@ -186,3 +187,6 @@ MODULE_PARM_DESC(edp_vswing,
 Ignore/Override vswing pre-emph table selection from VBT 
 (0=use value from vbt [default], 1=low power swing(200mV),
 2=default swing(400mV)));
+
+module_param_named(scheduler_override, i915.scheduler_override, int, 0600);
+MODULE_PARM_DESC(scheduler_override, Scheduler override mask (0 = none, 1 = 
direct submission [default]));
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 3155f42..224c8b4 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -44,6 +44,9 @@ bool i915_scheduler_is_enabled(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev-dev_private;
 
+   if (i915.scheduler_override  i915_so_direct_submit)
+   return false;
+
return dev_priv-scheduler != NULL;
 }
 
@@ -92,7 +95,7 @@ int i915_scheduler_queue_execbuffer(struct 
i915_scheduler_queue_entry *qe)
 
BUG_ON(!scheduler);
 
-   if (1/*i915.scheduler_override  i915_so_direct_submit*/) {
+   if (i915.scheduler_override  i915_so_direct_submit) {
int ret;
 
qe-scheduler_index = scheduler-index++;
@@ -466,7 +469,7 @@ int i915_scheduler_handle_irq(struct intel_engine_cs *ring)
 
seqno = ring-get_seqno(ring, false);
 
-   if (1/*i915.scheduler_override  i915_so_direct_submit*/)
+   if (i915.scheduler_override  i915_so_direct_submit)
return 0;
 
if (seqno == scheduler-last_irq_seqno[ring-id]) {
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h 
b/drivers/gpu/drm/i915/i915_scheduler.h
index b440e62..7d743c9 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -89,6 +89,11 @@ enum {
i915_sf_submitting  = (1  1),
 };
 
+/* Options for 'scheduler_override' module parameter: */
+enum {
+   i915_so_direct_submit   = (1  0),
+};
+
 booli915_scheduler_is_enabled(struct drm_device *dev);
 int i915_scheduler_init(struct drm_device *dev);
 int i915_scheduler_closefile(struct drm_device *dev,
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 01/39] drm/i915: Add total count to context status debugfs output

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

When there are lots and lots and even more lots of contexts (e.g. when running
with execlists) it is useful to be able to immediately see what the total
context count is.

Change-Id: If9726d4df86567100ecf53867b43f4753f08bf84
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index c50a798..05646fe 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1942,6 +1942,7 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
struct drm_i915_private *dev_priv = dev-dev_private;
struct intel_engine_cs *ring;
struct intel_context *ctx;
+   uint32_t count = 0;
int ret, i;
 
ret = mutex_lock_interruptible(dev-struct_mutex);
@@ -1955,6 +1956,7 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
 
seq_puts(m, HW context );
describe_ctx(m, ctx);
+   count++;
for_each_ring(ring, dev_priv, i) {
if (ring-default_context == ctx)
seq_printf(m, (default context %s) ,
@@ -1983,6 +1985,8 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
seq_putc(m, '\n');
}
 
+   seq_printf(m, Total: %d contexts\n, count);
+
mutex_unlock(dev-struct_mutex);
 
return 0;
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 37/39] drm/i915: GPU priority bumping to prevent starvation

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

If a high priority task was to continuously submit batch buffers to the driver,
it could starve out any lower priority task from getting any GPU time at all. To
prevent this, the priority of a queued batch buffer is bumped each time it does
not get submitted to the hardware.

Change-Id: I0319c7d2f306c61a283f03edda9b5d09a6d3b621
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c   | 28 
 drivers/gpu/drm/i915/i915_scheduler.c | 14 ++
 drivers/gpu/drm/i915/i915_scheduler.h |  1 +
 3 files changed, 43 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 3c5c750..509668f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1152,6 +1152,33 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_scheduler_priority_max_fops,
0x%llx\n);
 
 static int
+i915_scheduler_priority_bump_get(void *data, u64 *val)
+{
+   struct drm_device   *dev   = data;
+   struct drm_i915_private *dev_priv  = dev-dev_private;
+   struct i915_scheduler   *scheduler = dev_priv-scheduler;
+
+   *val = (u64) scheduler-priority_level_bump;
+   return 0;
+}
+
+static int
+i915_scheduler_priority_bump_set(void *data, u64 val)
+{
+   struct drm_device   *dev   = data;
+   struct drm_i915_private *dev_priv  = dev-dev_private;
+   struct i915_scheduler   *scheduler = dev_priv-scheduler;
+
+   scheduler-priority_level_bump = (u32) val;
+   return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_scheduler_priority_bump_fops,
+   i915_scheduler_priority_bump_get,
+   i915_scheduler_priority_bump_set,
+   0x%llx\n);
+
+static int
 i915_scheduler_priority_preempt_get(void *data, u64 *val)
 {
struct drm_device   *dev   = data;
@@ -5349,6 +5376,7 @@ static const struct i915_debugfs_files {
{i915_error_state, i915_error_state_fops},
{i915_next_seqno, i915_next_seqno_fops},
{i915_scheduler_priority_max, i915_scheduler_priority_max_fops},
+   {i915_scheduler_priority_bump, i915_scheduler_priority_bump_fops},
{i915_scheduler_priority_preempt, 
i915_scheduler_priority_preempt_fops},
{i915_scheduler_min_flying, i915_scheduler_min_flying_fops},
{i915_scheduler_file_queue_max, i915_scheduler_file_queue_max_fops},
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 631f4e6..8de3f0b 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -191,6 +191,7 @@ int i915_scheduler_init(struct drm_device *dev)
 
/* Default tuning values: */
scheduler-priority_level_max = ~0U;
+   scheduler-priority_level_bump=  50;
scheduler-priority_level_preempt = 900;
scheduler-min_flying = 2;
scheduler-file_queue_max = 64;
@@ -1568,6 +1569,19 @@ static int i915_scheduler_submit(struct intel_engine_cs 
*ring, bool was_locked)
ret = i915_scheduler_pop_from_queue_locked(ring, node, flags);
} while (ret == 0);
 
+   /*
+* Bump the priority of everything that was not submitted to prevent
+* starvation of low priority tasks by a spamming high priority task.
+*/
+   i915_scheduler_priority_bump_clear(scheduler);
+   list_for_each_entry(node, scheduler-node_queue[ring-id], link) {
+   if (!I915_SQS_IS_QUEUED(node))
+   continue;
+
+   i915_scheduler_priority_bump(scheduler, node,
+scheduler-priority_level_bump);
+   }
+
spin_unlock_irqrestore(scheduler-lock, flags);
 
if (!was_locked)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h 
b/drivers/gpu/drm/i915/i915_scheduler.h
index 2113e7d..8f3e42f 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -119,6 +119,7 @@ struct i915_scheduler {
 
/* Tuning parameters: */
uint32_tpriority_level_max;
+   uint32_tpriority_level_bump;
uint32_tpriority_level_preempt;
uint32_tmin_flying;
uint32_tfile_queue_max;
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 30/39] drm/i915: Added scheduler queue throttling by DRM file handle

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The scheduler decouples the submission of batch buffers to the driver from their
subsequent submission to the hardware. This means that an application which is
continuously submitting buffers as fast as it can could potentialy flood the
driver. To prevent this, the driver now tracks how many buffers are in progress
(queued in software or executing in hardware) and limits this to a given
(tunable) number. If this number is exceeded then the queue to the driver will
return EAGAIN and thus prevent the scheduler's queue becoming arbitrarily large.

Change-Id: I83258240aec7c810db08c006a3062d46aa91363f
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h|  2 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  8 +++
 drivers/gpu/drm/i915/i915_scheduler.c  | 34 ++
 drivers/gpu/drm/i915/i915_scheduler.h  |  2 ++
 4 files changed, 46 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b568432..e230632 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -334,6 +334,8 @@ struct drm_i915_file_private {
} rps;
 
struct intel_engine_cs *bsd_ring;
+
+   u32 scheduler_queue_length;
 };
 
 enum intel_dpll_id {
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index f90a2c8..c2a69d8 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1935,6 +1935,10 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
return -EINVAL;
}
 
+   /* Throttle batch requests per device file */
+   if (i915_scheduler_file_queue_is_full(file))
+   return -EAGAIN;
+
/* Copy in the exec list from userland */
exec_list = drm_malloc_ab(sizeof(*exec_list), args-buffer_count);
exec2_list = drm_malloc_ab(sizeof(*exec2_list), args-buffer_count);
@@ -2018,6 +2022,10 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
return -EINVAL;
}
 
+   /* Throttle batch requests per device file */
+   if (i915_scheduler_file_queue_is_full(file))
+   return -EAGAIN;
+
exec2_list = kmalloc(sizeof(*exec2_list)*args-buffer_count,
 GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
if (exec2_list == NULL)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 408bedc..f0c99ad 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -40,6 +40,8 @@ static voidi915_scheduler_priority_bump_clear(struct 
i915_scheduler *sch
 static int i915_scheduler_priority_bump(struct i915_scheduler 
*scheduler,
struct 
i915_scheduler_queue_entry *target,
uint32_t bump);
+static voidi915_scheduler_file_queue_inc(struct drm_file *file);
+static voidi915_scheduler_file_queue_dec(struct drm_file *file);
 
 bool i915_scheduler_is_enabled(struct drm_device *dev)
 {
@@ -75,6 +77,7 @@ int i915_scheduler_init(struct drm_device *dev)
scheduler-priority_level_max = ~0U;
scheduler-priority_level_preempt = 900;
scheduler-min_flying = 2;
+   scheduler-file_queue_max = 64;
 
dev_priv-scheduler = scheduler;
 
@@ -249,6 +252,8 @@ int i915_scheduler_queue_execbuffer(struct 
i915_scheduler_queue_entry *qe)
 
list_add_tail(node-link, scheduler-node_queue[ring-id]);
 
+   i915_scheduler_file_queue_inc(node-params.file);
+
if (i915.scheduler_override  i915_so_submit_on_queue)
not_flying = true;
else
@@ -630,6 +635,12 @@ static int i915_scheduler_remove(struct intel_engine_cs 
*ring)
/* Strip the dependency info while the mutex is still locked */
i915_scheduler_remove_dependent(scheduler, node);
 
+   /* Likewise clean up the file descriptor before it might 
disappear. */
+   if (node-params.file) {
+   i915_scheduler_file_queue_dec(node-params.file);
+   node-params.file = NULL;
+   }
+
continue;
}
 
@@ -1330,3 +1341,26 @@ int i915_scheduler_closefile(struct drm_device *dev, 
struct drm_file *file)
 
return 0;
 }
+
+bool i915_scheduler_file_queue_is_full(struct drm_file *file)
+{
+   struct drm_i915_file_private *file_priv = file-driver_priv;
+   struct drm_i915_private  *dev_priv  = file_priv-dev_priv;
+   struct i915_scheduler*scheduler = dev_priv-scheduler;
+
+   return file_priv-scheduler_queue_length = scheduler-file_queue_max;
+}
+
+static void i915_scheduler_file_queue_inc(struct drm_file *file)
+{
+   struct 

[Intel-gfx] [RFC 29/39] drm/i915: Added trace points to scheduler

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

Added trace points to the scheduler to track all the various events, node state
transitions and other interesting things that occur.

Change-Id: I9886390cfc7897bc1faf50a104bc651d8baed8a5
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   2 +
 drivers/gpu/drm/i915/i915_scheduler.c  |  34 -
 drivers/gpu/drm/i915/i915_trace.h  | 208 +
 drivers/gpu/drm/i915/intel_lrc.c   |   2 +
 4 files changed, 244 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1325b19..f90a2c8 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1291,6 +1291,8 @@ i915_gem_ringbuffer_submission(struct 
i915_execbuffer_params *params,
 
i915_gem_execbuffer_move_to_active(vmas, params-request);
 
+   trace_i915_gem_ring_queue(ring, params);
+
qe = container_of(params, typeof(*qe), params);
ret = i915_scheduler_queue_execbuffer(qe);
if (ret)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 66dbc20..408bedc 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -101,6 +101,8 @@ int i915_scheduler_queue_execbuffer(struct 
i915_scheduler_queue_entry *qe)
 
qe-scheduler_index = scheduler-index++;
 
+   trace_i915_scheduler_queue(qe-params.ring, qe);
+
WARN_ON(qe-params.fence_wait 
(atomic_read(qe-params.fence_wait-status) == 0));
 
@@ -253,6 +255,9 @@ int i915_scheduler_queue_execbuffer(struct 
i915_scheduler_queue_entry *qe)
not_flying = i915_scheduler_count_flying(scheduler, ring) 
 scheduler-min_flying;
 
+   trace_i915_scheduler_queue(ring, node);
+   trace_i915_scheduler_node_state_change(ring, node);
+
spin_unlock_irqrestore(scheduler-lock, flags);
 
if (not_flying)
@@ -280,6 +285,9 @@ static int i915_scheduler_fly_node(struct 
i915_scheduler_queue_entry *node)
 
node-status = i915_sqs_flying;
 
+   trace_i915_scheduler_fly(ring, node);
+   trace_i915_scheduler_node_state_change(ring, node);
+
if (!(scheduler-flags[ring-id]  i915_sf_interrupts_enabled)) {
boolsuccess = true;
 
@@ -344,6 +352,8 @@ static void i915_scheduler_node_requeue(struct 
i915_scheduler_queue_entry *node)
BUG_ON(!I915_SQS_IS_FLYING(node));
 
node-status = i915_sqs_queued;
+   trace_i915_scheduler_unfly(node-params.ring, node);
+   trace_i915_scheduler_node_state_change(node-params.ring, node);
 }
 
 /* Give up on a popped node completely. For example, because it is causing the
@@ -354,6 +364,8 @@ static void i915_scheduler_node_kill(struct 
i915_scheduler_queue_entry *node)
BUG_ON(!I915_SQS_IS_FLYING(node));
 
node-status = i915_sqs_dead;
+   trace_i915_scheduler_unfly(node-params.ring, node);
+   trace_i915_scheduler_node_state_change(node-params.ring, node);
 }
 
 /* Abandon a queued node completely. For example because the driver is being
@@ -365,6 +377,7 @@ static void i915_scheduler_node_kill_queued(struct 
i915_scheduler_queue_entry *n
BUG_ON(!I915_SQS_IS_QUEUED(node));
 
node-status = i915_sqs_dead;
+   trace_i915_scheduler_node_state_change(node-params.ring, node);
 }
 
 /* The system is toast. Terminate all nodes with extreme prejudice. */
@@ -429,8 +442,10 @@ static void i915_scheduler_seqno_complete(struct 
intel_engine_cs *ring, uint32_t
 * if a completed entry is found then there is no need to scan further.
 */
list_for_each_entry(node, scheduler-node_queue[ring-id], link) {
-   if (I915_SQS_IS_COMPLETE(node))
+   if (I915_SQS_IS_COMPLETE(node)) {
+   trace_i915_scheduler_landing(ring, seqno, node);
return;
+   }
 
if (seqno == node-params.request-seqno)
break;
@@ -441,8 +456,12 @@ static void i915_scheduler_seqno_complete(struct 
intel_engine_cs *ring, uint32_t
 * like cache flushes and page flips. So don't complain about if
 * no node was found.
 */
-   if (node-link == scheduler-node_queue[ring-id])
+   if (node-link == scheduler-node_queue[ring-id]) {
+   trace_i915_scheduler_landing(ring, seqno, NULL);
return;
+   }
+
+   trace_i915_scheduler_landing(ring, seqno, node);
 
WARN_ON(!I915_SQS_IS_FLYING(node));
 
@@ -457,6 +476,7 @@ static void i915_scheduler_seqno_complete(struct 
intel_engine_cs *ring, uint32_t
 
/* Node was in flight so mark it as complete. */
node-status = i915_sqs_complete;
+  

[Intel-gfx] [RFC 25/39] drm/i915: Defer seqno allocation until actual hardware submission time

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The seqno value is now only used for the final test for completion of a request.
It is no longer used to track the request through the software stack. Thus it is
no longer necessary to allocate the seqno immediately with the request. Instead,
it can be done lazily and left until the request is actually sent to the
hardware. This is particular advantageous with a GPU scheduler as the requests
can then be re-ordered between their creation and their hardware submission
without having out of order seqnos.

v2: i915_add_request() can't fail!

v3: combine with 'drm/i915: Assign seqno at start of exec_final()'
Various bits of code during the execbuf code path need a seqno value to be
assigned to the request. This change makes this assignment explicit at the start
of submission_final() rather than relying on an auto-generated seqno to have
happened already. This is in preparation for a future patch which changes seqno
values to be assigned lazily (during add_request).

Change-Id: I0d922b84c517611a79fa6c2b9e730d4fe3671d6a
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h|  1 +
 drivers/gpu/drm/i915/i915_gem.c| 21 -
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 +
 drivers/gpu/drm/i915/intel_lrc.c   | 13 +
 4 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 30552cc..12b4986 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2201,6 +2201,7 @@ struct drm_i915_gem_request {
 
/** GEM sequence number associated with this request. */
uint32_t seqno;
+   uint32_t reserved_seqno;
 
/* Unique identifier which can be used for trace points  debug */
uint32_t uniq;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 20c696f..7308838 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2524,6 +2524,9 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 
/* reserve 0 for non-seqno */
if (dev_priv-next_seqno == 0) {
+   /* Why is the full re-initialisation required? Is it only for
+* hardware semaphores? If so, could skip it in the case where
+* semaphores are disabled? */
int ret = i915_gem_init_seqno(dev, 0);
if (ret)
return ret;
@@ -2581,6 +2584,12 @@ void __i915_add_request(struct drm_i915_gem_request 
*request,
WARN(ret, *_ring_flush_all_caches failed: %d!\n, ret);
}
 
+   /* Make the request's seqno 'live': */
+   if(!request-seqno) {
+   request-seqno = request-reserved_seqno;
+   WARN_ON(request-seqno != dev_priv-last_seqno);
+   }
+
/* Record the position of the start of the request so that
 * should we detect the updated seqno part-way through the
 * GPU processing the request, we never over-estimate the
@@ -2821,6 +2830,9 @@ void i915_gem_request_notify(struct intel_engine_cs *ring)
if (!complete)
continue;
} else {
+   /* How can this happen? */
+   WARN_ON(req-seqno == 0);
+
if (!i915_seqno_passed(seqno, req-seqno))
continue;
}
@@ -3009,7 +3021,14 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
if (req == NULL)
return -ENOMEM;
 
-   ret = i915_gem_get_seqno(ring-dev, req-seqno);
+   /*
+* Assign an identifier to track this request through the hardware
+* but don't make it live yet. It could change in the future if this
+* request gets overtaken. However, it still needs to be allocated
+* in advance because the point of submission must not fail and seqno
+* allocation can fail.
+*/
+   ret = i915_gem_get_seqno(ring-dev, req-reserved_seqno);
if (ret)
goto err;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 61a5498..1642701 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1317,6 +1317,19 @@ int i915_gem_ringbuffer_submission_final(struct 
i915_execbuffer_params *params)
/* The mutex must be acquired before calling this function */
BUG_ON(!mutex_is_locked(params-dev-struct_mutex));
 
+   /* Make sure the request's seqno is the latest and greatest: */
+   if(params-request-reserved_seqno != dev_priv-last_seqno) {
+   ret = i915_gem_get_seqno(ring-dev, 
params-request-reserved_seqno);
+   if 

[Intel-gfx] [RFC 16/39] drm/i915: Added tracking/locking of batch buffer objects

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The scheduler needs to track interdependencies between batch buffers. These are
calculated by analysing the object lists of the buffers and looking for
commonality. The scheduler also needs to keep those buffers locked long after
the initial IOCTL call has returned to user land.

Change-Id: I31e3677ecfc2c9b5a908bda6acc4850432d55f1e
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 48 --
 drivers/gpu/drm/i915/i915_scheduler.c  | 33 ++--
 2 files changed, 76 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 75d018d..61a5498 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1498,7 +1498,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct i915_execbuffer_params *params = qe.params;
const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
u32 dispatch_flags;
-   int ret;
+   int ret, i;
bool need_relocs;
int fd_fence_complete = -1;
 #ifdef CONFIG_SYNC
@@ -1636,6 +1636,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
goto pre_mutex_err;
}
 
+   qe.saved_objects = kzalloc(
+   sizeof(*qe.saved_objects) * args-buffer_count,
+   GFP_KERNEL);
+   if (!qe.saved_objects) {
+   ret = -ENOMEM;
+   goto err;
+   }
+
/* Look up object handles */
ret = eb_lookup_vmas(eb, exec, args, vm, file);
if (ret)
@@ -1756,7 +1764,26 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
params-args_DR1= args-DR1;
params-args_DR4= args-DR4;
params-batch_obj   = batch_obj;
-   params-ctx = ctx;
+
+   /*
+* Save away the list of objects used by this batch buffer for the
+* purpose of tracking inter-buffer dependencies.
+*/
+   for (i = 0; i  args-buffer_count; i++) {
+   /*
+* NB: 'drm_gem_object_lookup()' increments the object's
+* reference count and so must be matched by a
+* 'drm_gem_object_unreference' call.
+*/
+   qe.saved_objects[i].obj =
+   to_intel_bo(drm_gem_object_lookup(dev, file,
+ exec[i].handle));
+   }
+   qe.num_objs = i;
+
+   /* Lock and save the context object as well. */
+   i915_gem_context_reference(ctx);
+   params-ctx = ctx;
 
 #ifdef CONFIG_SYNC
if (args-flags  I915_EXEC_CREATE_FENCE) {
@@ -1808,6 +1835,23 @@ err:
i915_gem_context_unreference(ctx);
eb_destroy(eb);
 
+   if (qe.saved_objects) {
+   /* Need to release the objects: */
+   for (i = 0; i  qe.num_objs; i++) {
+   if (!qe.saved_objects[i].obj)
+   continue;
+
+   drm_gem_object_unreference(
+   qe.saved_objects[i].obj-base);
+   }
+
+   kfree(qe.saved_objects);
+
+   /* Context too */
+   if (params-ctx)
+   i915_gem_context_unreference(params-ctx);
+   }
+
/*
 * If the request was created but not successfully submitted then it
 * must be freed again. If it was submitted then it is being tracked
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index e145829..f5fa968 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -108,7 +108,23 @@ int i915_scheduler_queue_execbuffer(struct 
i915_scheduler_queue_entry *qe)
if (ret)
return ret;
 
-   /* Free everything that is owned by the QE structure: */
+   /* Need to release the objects: */
+   for (i = 0; i  qe-num_objs; i++) {
+   if (!qe-saved_objects[i].obj)
+   continue;
+
+   
drm_gem_object_unreference(qe-saved_objects[i].obj-base);
+   }
+
+   kfree(qe-saved_objects);
+   qe-saved_objects = NULL;
+   qe-num_objs = 0;
+
+   /* Free the context object too: */
+   if (qe-params.ctx)
+   i915_gem_context_unreference(qe-params.ctx);
+
+   /* And anything else owned by the QE structure: */
kfree(qe-params.cliprects);
if (qe-params.dispatch_flags  I915_DISPATCH_SECURE)

i915_gem_execbuff_release_batch_obj(qe-params.batch_obj);
@@ -425,7 +441,7 @@ static 

[Intel-gfx] [RFC 09/39] drm/i915: Added scheduler hook into i915_gem_complete_requests_ring()

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The GPU scheduler can cause requests to complete out of order. For example,
because one request pre-empted others that had already been submitted. This
means the simple seqno comparison is not necessarily valid. Instead, a check
against what the scheduler is currently doing must be made to determine if a
request has really completed.

Change-Id: I149250a8f9382586514ca324aba1c53063b83e19
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h   |  2 ++
 drivers/gpu/drm/i915/i915_gem.c   | 13 +++--
 drivers/gpu/drm/i915/i915_scheduler.c | 31 +++
 drivers/gpu/drm/i915/i915_scheduler.h |  2 ++
 4 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7d2a494..58f53ec 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2238,6 +2238,8 @@ struct drm_i915_gem_request {
/** process identifier submitting this request */
struct pid *pid;
 
+   struct i915_scheduler_queue_entry   *scheduler_qe;
+
/**
 * The ELSP only accepts two elements at a time, so we queue
 * context/tail pairs on a given queue (ring-execlist_queue) until the
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 56405cd..e3c4032 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2772,6 +2772,7 @@ void i915_gem_request_notify(struct intel_engine_cs *ring)
 {
struct drm_i915_gem_request *req, *req_next;
unsigned long flags;
+   bool complete;
u32 seqno;
LIST_HEAD(free_list);
 
@@ -2785,8 +2786,13 @@ void i915_gem_request_notify(struct intel_engine_cs 
*ring)
spin_lock_irqsave(ring-fence_lock, flags);
list_for_each_entry_safe(req, req_next, ring-fence_signal_list, 
signal_list) {
if (!req-cancelled) {
-   if (!i915_seqno_passed(seqno, req-seqno))
-   continue;
+   if (i915_scheduler_is_request_tracked(req, complete, 
NULL)) {
+   if (!complete)
+   continue;
+   } else {
+   if (!i915_seqno_passed(seqno, req-seqno))
+   continue;
+   }
 
fence_signal_locked(req-fence);
trace_i915_gem_request_complete(req);
@@ -2811,6 +2817,9 @@ void i915_gem_request_notify(struct intel_engine_cs *ring)
 
i915_gem_request_unreference(req);
}
+
+   /* Necessary? Or does the fence_signal() call do an implicit wakeup? */
+   wake_up_all(ring-irq_queue);
 }
 
 static void i915_fence_timeline_value_str(struct fence *fence, char *str, int 
size)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 71d8df7..0d1cbe3 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -119,6 +119,9 @@ int i915_scheduler_queue_execbuffer(struct 
i915_scheduler_queue_entry *qe)
node-stamp  = stamp;
i915_gem_request_reference(node-params.request);
 
+   BUG_ON(node-params.request-scheduler_qe);
+   node-params.request-scheduler_qe = node;
+
/* Need to determine the number of incomplete entries in the list as
 * that will be the maximum size of the dependency list.
 *
@@ -363,6 +366,13 @@ static void i915_scheduler_seqno_complete(struct 
intel_engine_cs *ring, uint32_t
got_changes = true;
}
 
+   /*
+* Avoid issues with requests not being signalled because their
+* interrupt has already passed.
+*/
+   if (got_changes)
+   i915_gem_request_notify(ring);
+
/* Should submit new work here if flight list is empty but the DRM
 * mutex lock might not be available if a '__wait_request()' call is
 * blocking the system. */
@@ -504,6 +514,7 @@ int i915_scheduler_remove(struct intel_engine_cs *ring)

i915_gem_execbuff_release_batch_obj(node-params.batch_obj);
 
/* Free everything that is owned by the node: */
+   node-params.request-scheduler_qe = NULL;
i915_gem_request_unreference(node-params.request);
kfree(node-params.cliprects);
kfree(node-dep_list);
@@ -774,3 +785,23 @@ static int i915_scheduler_remove_dependent(struct 
i915_scheduler *scheduler,
 
return 0;
 }
+
+bool i915_scheduler_is_request_tracked(struct drm_i915_gem_request *req,
+  bool *completed, bool *busy)
+{
+   struct drm_i915_private *dev_priv = req-ring-dev-dev_private;
+   struct i915_scheduler   *scheduler = 

[Intel-gfx] [RFC 18/39] drm/i915: Added scheduler interrupt handler hook

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The scheduler needs to be informed of each batch buffer completion. This is done
via the user interrupt mechanism. The epilogue of each batch buffer submission
updates a sequence number value (seqno) and triggers a user interrupt.

This change hooks the scheduler in to the processing of that interrupt via the
notify_ring() function. The scheduler also has clean up code that needs to be
done outside of the interrupt context, thus notify_ring() now also pokes the
scheduler's work queue.

Change-Id: I4724b3ad7782453a244f84744d54bf14f5b65a38
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_irq.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index f67d09b..40a2eff 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -36,6 +36,7 @@
 #include i915_drv.h
 #include i915_trace.h
 #include intel_drv.h
+#include i915_scheduler.h
 
 /**
  * DOC: interrupt handling
@@ -851,6 +852,8 @@ static void notify_ring(struct intel_engine_cs *ring)
if (!intel_ring_initialized(ring))
return;
 
+   i915_scheduler_handle_irq(ring);
+
i915_gem_request_notify(ring);
 
wake_up_all(ring-irq_queue);
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 35/39] drm/i915: Added seqno values to scheduler status dump

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

It is useful to be able to see what seqnos have actually popped out of the
hardware when viewing the scheduler status.

Change-Id: Ie93e51c64328be2606b8b43440f6344d5f225426
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_scheduler.c | 10 ++
 drivers/gpu/drm/i915/i915_scheduler.h |  1 +
 2 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 1547b64..7be1c89 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -152,6 +152,7 @@ const char *i915_scheduler_flag_str(uint32_t flags)
TEST_FLAG(i915_sf_dump_force, DumpForce|);
TEST_FLAG(i915_sf_dump_details,   DumpDetails|);
TEST_FLAG(i915_sf_dump_dependencies,  DumpDeps|);
+   TEST_FLAG(i915_sf_dump_seqno, DumpSeqno|);
 
 #undef TEST_FLAG
 
@@ -861,6 +862,7 @@ static int i915_scheduler_dump_all_locked(struct drm_device 
*dev, const char *ms
for_each_ring(ring, dev_priv, i) {
scheduler-flags[ring-id] |= i915_sf_dump_force   |
  i915_sf_dump_details |
+ i915_sf_dump_seqno   |
  i915_sf_dump_dependencies;
r = i915_scheduler_dump_locked(ring, msg);
if (ret == 0)
@@ -942,6 +944,14 @@ static int i915_scheduler_dump_locked(struct 
intel_engine_cs *ring, const char *
return 0;
}
 
+   if (scheduler-flags[ring-id]  i915_sf_dump_seqno) {
+   uint32_tseqno;
+
+   seqno= ring-get_seqno(ring, true);
+
+   DRM_DEBUG_DRIVER(%s Seqno = %d\n, ring-name, seqno);
+   }
+
if (scheduler-flags[ring-id]  i915_sf_dump_details) {
int i, deps;
uint32_tcount, counts[i915_sqs_MAX];
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h 
b/drivers/gpu/drm/i915/i915_scheduler.h
index dd0510c..6e6e3a0 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -137,6 +137,7 @@ enum {
i915_sf_dump_force  = (1  8),
i915_sf_dump_details= (1  9),
i915_sf_dump_dependencies   = (1  10),
+   i915_sf_dump_seqno  = (1  11),
 };
 const char *i915_scheduler_flag_str(uint32_t flags);
 
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 14/39] drm/i915: Redirect execbuffer_final() via scheduler

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

Updated the execbuffer() code to pass the packaged up batch buffer information
to the scheduler rather than calling execbuffer_final() directly. The scheduler
queue() code is currently a stub which simply chains on to _final() immediately.

Change-Id: I2a19062a9e66845f2e886332fc4b5fc7ac992864
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 19 +++
 drivers/gpu/drm/i915/intel_lrc.c   | 12 
 2 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ba9d595..364e9cc 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -37,6 +37,7 @@
 #ifdef CONFIG_SYNC
 #include ../drivers/staging/android/sync.h
 #endif
+#include i915_scheduler.h
 
 #define  __EXEC_OBJECT_HAS_PIN (131)
 #define  __EXEC_OBJECT_HAS_FENCE (130)
@@ -1198,6 +1199,7 @@ i915_gem_ringbuffer_submission(struct 
i915_execbuffer_params *params,
   struct drm_i915_gem_execbuffer2 *args,
   struct list_head *vmas)
 {
+   struct i915_scheduler_queue_entry *qe;
struct drm_device *dev = params-dev;
struct intel_engine_cs *ring = params-ring;
struct drm_i915_private *dev_priv = dev-dev_private;
@@ -1289,18 +1291,11 @@ i915_gem_ringbuffer_submission(struct 
i915_execbuffer_params *params,
 
i915_gem_execbuffer_move_to_active(vmas, params-request);
 
-   ret = dev_priv-gt.execbuf_final(params);
+   qe = container_of(params, typeof(*qe), params);
+   ret = i915_scheduler_queue_execbuffer(qe);
if (ret)
goto error;
 
-   /*
-* Free everything that was stored in the QE structure (until the
-* scheduler arrives and does it instead):
-*/
-   kfree(params-cliprects);
-   if (params-dispatch_flags  I915_DISPATCH_SECURE)
-   i915_gem_execbuff_release_batch_obj(params-batch_obj);
-
return 0;
 
 error:
@@ -1492,8 +1487,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct intel_engine_cs *ring;
struct intel_context *ctx;
struct i915_address_space *vm;
-   struct i915_execbuffer_params params_master; /* XXX: will be removed 
later */
-   struct i915_execbuffer_params *params = params_master;
+   struct i915_scheduler_queue_entry qe;
+   struct i915_execbuffer_params *params = qe.params;
const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
u32 dispatch_flags;
int ret;
@@ -1624,7 +1619,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
else
vm = dev_priv-gtt.base;
 
-   memset(params_master, 0x00, sizeof(params_master));
+   memset(qe, 0x00, sizeof(qe));
 
eb = eb_create(args);
if (eb == NULL) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index bba1152..a8c78ec 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -136,6 +136,7 @@
 #include drm/i915_drm.h
 #include i915_drv.h
 #include intel_mocs.h
+#include i915_scheduler.h
 
 #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
 #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
@@ -827,6 +828,7 @@ int intel_execlists_submission(struct 
i915_execbuffer_params *params,
   struct drm_i915_gem_execbuffer2 *args,
   struct list_head *vmas)
 {
+   struct i915_scheduler_queue_entry *qe;
struct drm_device   *dev = params-dev;
struct intel_engine_cs  *ring = params-ring;
struct drm_i915_private *dev_priv = dev-dev_private;
@@ -884,17 +886,11 @@ int intel_execlists_submission(struct 
i915_execbuffer_params *params,
 
i915_gem_execbuffer_move_to_active(vmas, params-request);
 
-   ret = dev_priv-gt.execbuf_final(params);
+   qe = container_of(params, typeof(*qe), params);
+   ret = i915_scheduler_queue_execbuffer(qe);
if (ret)
return ret;
 
-   /*
-* Free everything that was stored in the QE structure (until the
-* scheduler arrives and does it instead):
-*/
-   if (params-dispatch_flags  I915_DISPATCH_SECURE)
-   i915_gem_execbuff_release_batch_obj(params-batch_obj);
-
return 0;
 }
 
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 38/39] drm/i915: Enable GPU scheduler by default

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

Now that all the scheduler patches have been applied, it is safe to enable.

Change-Id: I128042e85a30fca765ce1eb46c837c62dee66089
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_params.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_params.c 
b/drivers/gpu/drm/i915/i915_params.c
index a5320ff..4656518 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -53,7 +53,7 @@ struct i915_params i915 __read_mostly = {
.verbose_state_checks = 1,
.nuclear_pageflip = 0,
.edp_vswing = 0,
-   .scheduler_override = 1,
+   .scheduler_override = 0,
 };
 
 module_param_named(modeset, i915.modeset, int, 0400);
@@ -189,4 +189,4 @@ MODULE_PARM_DESC(edp_vswing,
 2=default swing(400mV)));
 
 module_param_named(scheduler_override, i915.scheduler_override, int, 0600);
-MODULE_PARM_DESC(scheduler_override, Scheduler override mask (0 = none, 1 = 
direct submission [default]));
+MODULE_PARM_DESC(scheduler_override, Scheduler override mask (default: 0));
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 39/39] drm/i915: Allow scheduler to manage inter-ring object synchronisation

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The scheduler has always tracked batch buffer dependencies based on
DRM object usage. This means that it will not submit a batch on one
ring that has outstanding dependencies still executing on other rings.
This is exactly the same synchronisation performed by
i915_gem_object_sync() using hardware semaphores where available and
CPU stalls where not (e.g. in execlist mode and/or on Gen8 hardware).

Unfortunately, when a batch buffer is submitted to the driver the
_object_sync() call happens first. Thus in case where hardware
semaphores are disabled, the driver has already stalled until the
dependency has been resolved.

This patch adds an optimisation to _object_sync() to ignore the
synchronisation in the case where it will subsequently be handled by
the scheduler. This removes the driver stall and (in the single
application case) provides near hardware semaphore performance even
when hardware semaphores are disabled. In a busy system where there is
other work that can be executed on the stalling ring, it provides
better than hardware semaphore performance as it removes the stall
from both the driver and from the hardware. There is also a theory
that this method should improve power usage as hardware semaphores are
apparently not very power efficient - the stalled ring does not go
into as low a power a state as when it is genuinely idle.

The optimisation is to check whether both ends of the synchronisation
are batch buffer requests. If they are, then the scheduler will have
the inter-dependency tracked and managed. If one or other end is not a
batch buffer request (e.g. a page flip) then the code falls back to
the CPU stall or hardware semaphore as appropriate.

To check whether the existing usage is a batch buffer, the code simply
calls the 'are you tracking this request' function of the scheduler on
the object's last_read_req member. To check whether the new usage is a
batch buffer, a flag is passed in from the caller.

Change-Id: Idc16e19b5a4dc8b3782ce9db44dd3df445f396c1
Issue: VIZ-5566
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h|  2 +-
 drivers/gpu/drm/i915/i915_gem.c| 19 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c   |  2 +-
 4 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e230632..e4bef2c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2895,7 +2895,7 @@ int __must_check i915_mutex_lock_interruptible(struct 
drm_device *dev);
 #endif
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
 struct intel_engine_cs *to,
-struct drm_i915_gem_request **to_req);
+struct drm_i915_gem_request **to_req, bool to_batch);
 void i915_vma_move_to_active(struct i915_vma *vma,
 struct drm_i915_gem_request *req);
 int i915_gem_dumb_create(struct drm_file *file_priv,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7308838..e0dca8c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3507,7 +3507,7 @@ static int
 __i915_gem_object_sync(struct drm_i915_gem_object *obj,
   struct intel_engine_cs *to,
   struct drm_i915_gem_request *from_req,
-  struct drm_i915_gem_request **to_req)
+  struct drm_i915_gem_request **to_req, bool to_batch)
 {
struct intel_engine_cs *from;
int ret;
@@ -3519,6 +3519,15 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
if (i915_gem_request_completed(from_req))
return 0;
 
+   /*
+* The scheduler will manage inter-ring object dependencies
+* as long as both to and from requests are scheduler managed
+* (i.e. batch buffers).
+*/
+   if (to_batch 
+   i915_scheduler_is_request_tracked(from_req, NULL, NULL))
+   return 0;
+
if (!i915_semaphore_is_enabled(obj-base.dev)) {
struct drm_i915_private *i915 = to_i915(obj-base.dev);
ret = __i915_wait_request(from_req,
@@ -3569,6 +3578,8 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
  * @to_req: request we wish to use the object for. See below.
  *  This will be allocated and returned if a request is
  *  required but not passed in.
+ * @to_batch: is the sync request on behalf of batch buffer submission?
+ * If so then the scheduler can (potentially) manage the synchronisation.
  *
  * This code is meant to abstract object synchronization with the GPU.
  * Calling with NULL implies synchronizing the object with the CPU
@@ -3599,7 +3610,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
 int
 

[Intel-gfx] [RFC 31/39] drm/i915: Added debugfs interface to scheduler tuning parameters

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

There are various parameters within the scheduler which can be tuned to improve
performance, reduce memory footprint, etc. This change adds support for altering
these via debugfs.

Change-Id: I6c26765269ae7173ff4d3a5c20921eaaca7c36ed
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c | 113 
 1 file changed, 113 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 05646fe..028fa8f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -39,6 +39,7 @@
 #include intel_ringbuffer.h
 #include drm/i915_drm.h
 #include i915_drv.h
+#include i915_scheduler.h
 
 enum {
ACTIVE_LIST,
@@ -1123,6 +1124,114 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
i915_next_seqno_get, i915_next_seqno_set,
0x%llx\n);
 
+static int
+i915_scheduler_priority_max_get(void *data, u64 *val)
+{
+   struct drm_device   *dev   = data;
+   struct drm_i915_private *dev_priv  = dev-dev_private;
+   struct i915_scheduler   *scheduler = dev_priv-scheduler;
+
+   *val = (u64) scheduler-priority_level_max;
+   return 0;
+}
+
+static int
+i915_scheduler_priority_max_set(void *data, u64 val)
+{
+   struct drm_device   *dev   = data;
+   struct drm_i915_private *dev_priv  = dev-dev_private;
+   struct i915_scheduler   *scheduler = dev_priv-scheduler;
+
+   scheduler-priority_level_max = (u32) val;
+   return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_scheduler_priority_max_fops,
+   i915_scheduler_priority_max_get,
+   i915_scheduler_priority_max_set,
+   0x%llx\n);
+
+static int
+i915_scheduler_priority_preempt_get(void *data, u64 *val)
+{
+   struct drm_device   *dev   = data;
+   struct drm_i915_private *dev_priv  = dev-dev_private;
+   struct i915_scheduler   *scheduler = dev_priv-scheduler;
+
+   *val = (u64) scheduler-priority_level_preempt;
+   return 0;
+}
+
+static int
+i915_scheduler_priority_preempt_set(void *data, u64 val)
+{
+   struct drm_device   *dev   = data;
+   struct drm_i915_private *dev_priv  = dev-dev_private;
+   struct i915_scheduler   *scheduler = dev_priv-scheduler;
+
+   scheduler-priority_level_preempt = (u32) val;
+   return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_scheduler_priority_preempt_fops,
+   i915_scheduler_priority_preempt_get,
+   i915_scheduler_priority_preempt_set,
+   0x%llx\n);
+
+static int
+i915_scheduler_min_flying_get(void *data, u64 *val)
+{
+   struct drm_device   *dev   = data;
+   struct drm_i915_private *dev_priv  = dev-dev_private;
+   struct i915_scheduler   *scheduler = dev_priv-scheduler;
+
+   *val = (u64) scheduler-min_flying;
+   return 0;
+}
+
+static int
+i915_scheduler_min_flying_set(void *data, u64 val)
+{
+   struct drm_device   *dev   = data;
+   struct drm_i915_private *dev_priv  = dev-dev_private;
+   struct i915_scheduler   *scheduler = dev_priv-scheduler;
+
+   scheduler-min_flying = (u32) val;
+   return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_scheduler_min_flying_fops,
+   i915_scheduler_min_flying_get,
+   i915_scheduler_min_flying_set,
+   0x%llx\n);
+
+static int
+i915_scheduler_file_queue_max_get(void *data, u64 *val)
+{
+   struct drm_device   *dev   = data;
+   struct drm_i915_private *dev_priv  = dev-dev_private;
+   struct i915_scheduler   *scheduler = dev_priv-scheduler;
+
+   *val = (u64) scheduler-file_queue_max;
+   return 0;
+}
+
+static int
+i915_scheduler_file_queue_max_set(void *data, u64 val)
+{
+   struct drm_device   *dev   = data;
+   struct drm_i915_private *dev_priv  = dev-dev_private;
+   struct i915_scheduler   *scheduler = dev_priv-scheduler;
+
+   scheduler-file_queue_max = (u32) val;
+   return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_scheduler_file_queue_max_fops,
+   i915_scheduler_file_queue_max_get,
+   i915_scheduler_file_queue_max_set,
+   0x%llx\n);
+
 static int i915_frequency_info(struct seq_file *m, void *unused)
 {
struct drm_info_node *node = m-private;
@@ -5163,6 +5272,10 @@ static const struct i915_debugfs_files {
{i915_gem_drop_caches, i915_drop_caches_fops},
{i915_error_state, i915_error_state_fops},
{i915_next_seqno, i915_next_seqno_fops},
+   {i915_scheduler_priority_max, i915_scheduler_priority_max_fops},
+   {i915_scheduler_priority_preempt, 
i915_scheduler_priority_preempt_fops},
+   {i915_scheduler_min_flying, i915_scheduler_min_flying_fops},
+   

[Intel-gfx] [RFC 4/9] drm/i915: Removed now redudant parameter to i915_gem_request_completed()

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The change to the implementation of i915_gem_request_completed() means that the
lazy coherency flag is no longer used. This can now be removed to simplify the
interface.

For: VIZ-5190
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  2 +-
 drivers/gpu/drm/i915/i915_drv.h  |  3 +--
 drivers/gpu/drm/i915/i915_gem.c  | 18 +-
 drivers/gpu/drm/i915/intel_display.c |  2 +-
 drivers/gpu/drm/i915/intel_pm.c  |  4 ++--
 5 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index bc817da..b9a92fe 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -602,7 +602,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void 
*data)
   
i915_gem_request_get_seqno(work-flip_queued_req),
   dev_priv-next_seqno,
   ring-get_seqno(ring, true),
-  
i915_gem_request_completed(work-flip_queued_req, true));
+  
i915_gem_request_completed(work-flip_queued_req));
} else
seq_printf(m, Flip not associated with any 
ring\n);
seq_printf(m, Flip queued on frame %d, (was ready on 
frame %d), now %d\n,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 79d346c..0c7df46 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2239,8 +2239,7 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
   struct drm_i915_gem_request **req_out);
 void i915_gem_request_cancel(struct drm_i915_gem_request *req);
 
-static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
- bool lazy_coherency)
+static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
 {
return fence_is_signaled(req-fence);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 888bb72..3970250 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1170,7 +1170,7 @@ static int __i915_spin_request(struct 
drm_i915_gem_request *req)
 
timeout = jiffies + 1;
while (!need_resched()) {
-   if (i915_gem_request_completed(req, true))
+   if (i915_gem_request_completed(req))
return 0;
 
if (time_after_eq(jiffies, timeout))
@@ -1178,7 +1178,7 @@ static int __i915_spin_request(struct 
drm_i915_gem_request *req)
 
cpu_relax_lowlatency();
}
-   if (i915_gem_request_completed(req, false))
+   if (i915_gem_request_completed(req))
return 0;
 
return -EAGAIN;
@@ -1222,7 +1222,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
if (list_empty(req-list))
return 0;
 
-   if (i915_gem_request_completed(req, true))
+   if (i915_gem_request_completed(req))
return 0;
 
timeout_expire = timeout ?
@@ -1262,7 +1262,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
break;
}
 
-   if (i915_gem_request_completed(req, false)) {
+   if (i915_gem_request_completed(req)) {
ret = 0;
break;
}
@@ -2759,7 +2759,7 @@ i915_gem_find_active_request(struct intel_engine_cs *ring)
struct drm_i915_gem_request *request;
 
list_for_each_entry(request, ring-request_list, list) {
-   if (i915_gem_request_completed(request, false))
+   if (i915_gem_request_completed(request))
continue;
 
return request;
@@ -2902,7 +2902,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
*ring)
   struct drm_i915_gem_request,
   list);
 
-   if (!i915_gem_request_completed(request, true))
+   if (!i915_gem_request_completed(request))
break;
 
i915_gem_request_retire(request);
@@ -2926,7 +2926,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
*ring)
}
 
if (unlikely(ring-trace_irq_req 
-i915_gem_request_completed(ring-trace_irq_req, true))) {
+i915_gem_request_completed(ring-trace_irq_req))) {
ring-irq_put(ring);
i915_gem_request_assign(ring-trace_irq_req, NULL);
}
@@ -3032,7 +3032,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object 
*obj)
if (list_empty(req-list))
  

[Intel-gfx] [RFC 2/9] android: add sync_fence_create_dma

2015-07-17 Thread John . C . Harrison
From: Maarten Lankhorst maarten.lankho...@canonical.com

This allows users of dma fences to create a android fence.

v2: Added kerneldoc. (Tvrtko Ursulin).

Signed-off-by: Maarten Lankhorst maarten.lankho...@canonical.com
Signed-off-by: Tvrtko Ursulin tvrtko.ursu...@intel.com
Cc: Maarten Lankhorst maarten.lankho...@linux.intel.com
Cc: Daniel Vetter dan...@ffwll.ch
Cc: Jesse Barnes jbar...@virtuousgeek.org
Cc: de...@driverdev.osuosl.org
Cc: Riley Andrews riandr...@android.com
Cc: Greg Kroah-Hartman gre...@linuxfoundation.org
Cc: Arve Hjønnevåg a...@android.com
---
 drivers/staging/android/sync.c | 13 +
 drivers/staging/android/sync.h | 12 +++-
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/android/sync.c b/drivers/staging/android/sync.c
index f83e00c..7f0e919 100644
--- a/drivers/staging/android/sync.c
+++ b/drivers/staging/android/sync.c
@@ -188,7 +188,7 @@ static void fence_check_cb_func(struct fence *f, struct 
fence_cb *cb)
 }
 
 /* TODO: implement a create which takes more that one sync_pt */
-struct sync_fence *sync_fence_create(const char *name, struct sync_pt *pt)
+struct sync_fence *sync_fence_create_dma(const char *name, struct fence *pt)
 {
struct sync_fence *fence;
 
@@ -199,16 +199,21 @@ struct sync_fence *sync_fence_create(const char *name, 
struct sync_pt *pt)
fence-num_fences = 1;
atomic_set(fence-status, 1);
 
-   fence-cbs[0].sync_pt = pt-base;
+   fence-cbs[0].sync_pt = pt;
fence-cbs[0].fence = fence;
-   if (fence_add_callback(pt-base, fence-cbs[0].cb,
-  fence_check_cb_func))
+   if (fence_add_callback(pt, fence-cbs[0].cb, fence_check_cb_func))
atomic_dec(fence-status);
 
sync_fence_debug_add(fence);
 
return fence;
 }
+EXPORT_SYMBOL(sync_fence_create_dma);
+
+struct sync_fence *sync_fence_create(const char *name, struct sync_pt *pt)
+{
+   return sync_fence_create_dma(name, pt-base);
+}
 EXPORT_SYMBOL(sync_fence_create);
 
 struct sync_fence *sync_fence_fdget(int fd)
diff --git a/drivers/staging/android/sync.h b/drivers/staging/android/sync.h
index a21b79f..0f1299e 100644
--- a/drivers/staging/android/sync.h
+++ b/drivers/staging/android/sync.h
@@ -250,10 +250,20 @@ void sync_pt_free(struct sync_pt *pt);
  * @pt:sync_pt to add to the fence
  *
  * Creates a fence containg @pt.  Once this is called, the fence takes
- * ownership of @pt.
+ * a reference on @pt.
  */
 struct sync_fence *sync_fence_create(const char *name, struct sync_pt *pt);
 
+/**
+ * sync_fence_create_dma() - creates a sync fence from dma-fence
+ * @name:  name of fence to create
+ * @pt:dma-fence to add to the fence
+ *
+ * Creates a fence containg @pt.  Once this is called, the fence takes
+ * a reference on @pt.
+ */
+struct sync_fence *sync_fence_create_dma(const char *name, struct fence *pt);
+
 /*
  * API for sync_fence consumers
  */
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 1/9] staging/android/sync: Support sync points created from dma-fences

2015-07-17 Thread John . C . Harrison
From: Tvrtko Ursulin tvrtko.ursu...@intel.com

Debug output assumes all sync points are built on top of Android sync points
and when we start creating them from dma-fences will NULL ptr deref unless
taught about this.

Signed-off-by: Tvrtko Ursulin tvrtko.ursu...@intel.com
Cc: Maarten Lankhorst maarten.lankho...@linux.intel.com
Cc: de...@driverdev.osuosl.org
Cc: Riley Andrews riandr...@android.com
Cc: Greg Kroah-Hartman gre...@linuxfoundation.org
Cc: Arve Hjønnevåg a...@android.com
---
 drivers/staging/android/sync_debug.c | 42 +++-
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/drivers/staging/android/sync_debug.c 
b/drivers/staging/android/sync_debug.c
index 91ed2c4..f45d13c 100644
--- a/drivers/staging/android/sync_debug.c
+++ b/drivers/staging/android/sync_debug.c
@@ -82,36 +82,42 @@ static const char *sync_status_str(int status)
return error;
 }
 
-static void sync_print_pt(struct seq_file *s, struct sync_pt *pt, bool fence)
+static void sync_print_pt(struct seq_file *s, struct fence *pt, bool fence)
 {
int status = 1;
-   struct sync_timeline *parent = sync_pt_parent(pt);
 
-   if (fence_is_signaled_locked(pt-base))
-   status = pt-base.status;
+   if (fence_is_signaled_locked(pt))
+   status = pt-status;
 
seq_printf(s,   %s%spt %s,
-  fence ? parent-name : ,
+  fence  pt-ops-get_timeline_name ?
+  pt-ops-get_timeline_name(pt) : ,
   fence ? _ : ,
   sync_status_str(status));
 
if (status = 0) {
struct timespec64 ts64 =
-   ktime_to_timespec64(pt-base.timestamp);
+   ktime_to_timespec64(pt-timestamp);
 
seq_printf(s, @%lld.%09ld, (s64)ts64.tv_sec, ts64.tv_nsec);
}
 
-   if (parent-ops-timeline_value_str 
-   parent-ops-pt_value_str) {
+   if ((!fence || pt-ops-timeline_value_str) 
+   pt-ops-fence_value_str) {
char value[64];
+   bool success;
 
-   parent-ops-pt_value_str(pt, value, sizeof(value));
-   seq_printf(s, : %s, value);
-   if (fence) {
-   parent-ops-timeline_value_str(parent, value,
-   sizeof(value));
-   seq_printf(s,  / %s, value);
+   pt-ops-fence_value_str(pt, value, sizeof(value));
+   success = strlen(value);
+
+   if (success)
+   seq_printf(s, : %s, value);
+
+   if (success  fence) {
+   pt-ops-timeline_value_str(pt, value, sizeof(value));
+
+   if (strlen(value))
+   seq_printf(s,  / %s, value);
}
}
 
@@ -138,7 +144,7 @@ static void sync_print_obj(struct seq_file *s, struct 
sync_timeline *obj)
list_for_each(pos, obj-child_list_head) {
struct sync_pt *pt =
container_of(pos, struct sync_pt, child_list);
-   sync_print_pt(s, pt, false);
+   sync_print_pt(s, pt-base, false);
}
spin_unlock_irqrestore(obj-child_list_lock, flags);
 }
@@ -153,11 +159,7 @@ static void sync_print_fence(struct seq_file *s, struct 
sync_fence *fence)
   sync_status_str(atomic_read(fence-status)));
 
for (i = 0; i  fence-num_fences; ++i) {
-   struct sync_pt *pt =
-   container_of(fence-cbs[i].sync_pt,
-struct sync_pt, base);
-
-   sync_print_pt(s, pt, true);
+   sync_print_pt(s, fence-cbs[i].sync_pt, true);
}
 
spin_lock_irqsave(fence-wq.lock, flags);
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC 1/9] staging/android/sync: Support sync points created from dma-fences

2015-07-17 Thread Tvrtko Ursulin


On 07/17/2015 03:31 PM, john.c.harri...@intel.com wrote:

From: Tvrtko Ursulin tvrtko.ursu...@intel.com

Debug output assumes all sync points are built on top of Android sync points
and when we start creating them from dma-fences will NULL ptr deref unless
taught about this.


This is Maarten's code, just the patch had a troubled history where it 
got misplaced, forgotten and then resurrected but with the commit 
message lost.


Tvrtko
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] igt/kms_plane_scaling : plane scaling enhancement and plane support for bxt

2015-07-17 Thread Thomas Wood
On 17 July 2015 at 09:34, Nabendu Maiti nabendu.bikash.ma...@intel.com wrote:
 1) Added more overlay plane support for BXT.
 2) Added and enhanced scaler test cases for additional planes.

Please split the various separate changes in different patches.

The i-g-t tag should also appear in the subject prefix rather than the
subject tag. (i.e. use --subject-prefix=PATCH i-g-t or git config
format.subjectprefix PATCH i-g-t).



 Signed-off-by: Nabendu Maiti nabendu.bikash.ma...@intel.com
 ---
  lib/igt_fb.c  | 40 
  lib/igt_fb.h  |  1 +
  lib/igt_kms.c |  1 +
  lib/igt_kms.h |  3 +-
  tests/kms_plane_scaling.c | 79 
 ---
  5 files changed, 118 insertions(+), 6 deletions(-)

 diff --git a/lib/igt_fb.c b/lib/igt_fb.c
 index 134dbd2..e61b762 100644
 --- a/lib/igt_fb.c
 +++ b/lib/igt_fb.c
 @@ -393,6 +393,46 @@ void igt_paint_image(cairo_t *cr, const char *filename,
  }

  /**
 + * igt_paint_cross_ruler:
 + * @cr: cairo drawing context
 + * @dst_width: width of the horizontal ruler
 + * @dst_height: height of the vertical ruler
 + *
 + * This function can be used to draw a cross ruler on a frame buffer.
 + */
 +void
 +igt_paint_cross_ruler(cairo_t *cr, uint16_t w, uint16_t h)

Other cairo functions use int rather than uint16_t for width and
height, so int should be sufficient here too, at least for
consistency.


 +{
 +
 +   uint16_t i;
 +
 +   /* Paint corner markers */
 +   paint_marker(cr, 0, 0);
 +   paint_marker(cr, w, 0);
 +   paint_marker(cr, 0, h);
 +   paint_marker(cr, w, h);
 +
 +
 +   cairo_move_to(cr, w/2, 0);
 +   cairo_line_to(cr, w/2, h);
 +
 +   cairo_set_source_rgb(cr, 4, 1, 10);
 +   cairo_move_to(cr, 0, h/2);
 +   cairo_line_to(cr, w, h/2 );
 +   cairo_stroke(cr);
 +
 +   cairo_set_source_rgb(cr, 2, 5, 1);
 +   cairo_set_line_width(cr, 2);
 +   cairo_stroke(cr);
 +   cairo_stroke_preserve(cr);
 +   for (i = 0; i  w; i +=200)
 +   paint_marker(cr, i, h/2);
 +
 +   for (i = 0; i  h; i +=200)
 +   paint_marker(cr, w/2, i);
 +}
 +
 +/**
   * igt_create_fb_with_bo_size:
   * @fd: open i915 drm file descriptor
   * @width: width of the framebuffer in pixel
 diff --git a/lib/igt_fb.h b/lib/igt_fb.h
 index a07acd2..cf6e7e3 100644
 --- a/lib/igt_fb.h
 +++ b/lib/igt_fb.h
 @@ -98,6 +98,7 @@ void igt_write_fb_to_png(int fd, struct igt_fb *fb, const 
 char *filename);
  int igt_cairo_printf_line(cairo_t *cr, enum igt_text_align align,
double yspacing, const char *fmt, ...)
__attribute__((format (printf, 4, 5)));
 +void igt_paint_cross_ruler(cairo_t *cr, uint16_t w, uint16_t h);

  /* helpers to handle drm fourcc codes */
  uint32_t igt_bpp_depth_to_drm_format(int bpp, int depth);
 diff --git a/lib/igt_kms.c b/lib/igt_kms.c
 index 0bb16b4..781ffa5 100644
 --- a/lib/igt_kms.c
 +++ b/lib/igt_kms.c
 @@ -213,6 +213,7 @@ const char *kmstest_plane_name(enum igt_plane plane)
 [IGT_PLANE_1] = plane1,
 [IGT_PLANE_2] = plane2,
 [IGT_PLANE_3] = plane3,
 +   [IGT_PLANE_4] = plane4,
 [IGT_PLANE_CURSOR] = cursor,
 };

 diff --git a/lib/igt_kms.h b/lib/igt_kms.h
 index 09c08aa..14c8b28 100644
 --- a/lib/igt_kms.h
 +++ b/lib/igt_kms.h
 @@ -53,6 +53,7 @@ enum igt_plane {
  IGT_PLANE_PRIMARY = IGT_PLANE_1,
  IGT_PLANE_2,
  IGT_PLANE_3,
 +IGT_PLANE_4,
  IGT_PLANE_CURSOR,
  };

 @@ -205,7 +206,7 @@ struct igt_pipe {
 igt_display_t *display;
 enum pipe pipe;
 bool enabled;
 -#define IGT_MAX_PLANES 4
 +#define IGT_MAX_PLANES 5
 int n_planes;
 igt_plane_t planes[IGT_MAX_PLANES];
 uint64_t background; /* Background color MSB BGR 16bpc LSB */
 diff --git a/tests/kms_plane_scaling.c b/tests/kms_plane_scaling.c
 index 00db5cb..8e3d559 100644
 --- a/tests/kms_plane_scaling.c
 +++ b/tests/kms_plane_scaling.c
 @@ -23,7 +23,7 @@
   */

  #include math.h
 -
 +#include cairo.h

cairo is already included via igt_kms.h and igt_fb.h.


  #include drmtest.h
  #include igt_debugfs.h
  #include igt_kms.h
 @@ -48,9 +48,11 @@ typedef struct {
 struct igt_fb fb1;
 struct igt_fb fb2;
 struct igt_fb fb3;
 +   struct igt_fb fb4;
 int fb_id1;
 int fb_id2;
 int fb_id3;
 +   int fb_id4;

 igt_plane_t *plane1;
 igt_plane_t *plane2;
 @@ -61,6 +63,22 @@ typedef struct {
  #define FILE_NAME   1080p-left.png

  static void
 +paint_plane_ID(data_t *d, struct igt_fb *fb, igt_plane_t *plane)
 +{
 +   cairo_t *cr;
 +
 +   cr = igt_get_cairo_ctx(d-drm_fd, fb);
 +   cairo_move_to(cr, (fb-width/5),
 + (fb-height / 5));
 +   cairo_set_font_size(cr, 25);
 +   igt_cairo_printf_line(cr, align_hcenter, 10, PIPE:PLANE:);
 + 

Re: [Intel-gfx] [PATCH i-g-t v3] Android.mk: Disable tools that do not build for android

2015-07-17 Thread Thomas Wood
On 17 July 2015 at 10:15, Derek Morton derek.j.mor...@intel.com wrote:
 Disable the tools / demo code that do not currently build
 for android until they can be fixed.

 Affected tools / demos
 intel_display_crc
 intel_sprite_on

 v2: intel_display_crc compiled conditionally on ANDROID_HAS_CAIRO
 flag.
 v3: removed intel_reg from the skip list as Thomas has prepared
 a patch to fix it for Android.

 Signed-off-by: Derek Morton derek.j.mor...@intel.com
 ---
  Android.mk   | 2 +-
  tools/Android.mk | 4 
  2 files changed, 5 insertions(+), 1 deletion(-)

 diff --git a/Android.mk b/Android.mk
 index 1ab3e64..681d114 100644
 --- a/Android.mk
 +++ b/Android.mk
 @@ -1,2 +1,2 @@
 -include $(call all-named-subdir-makefiles, lib tests tools benchmarks demos)
 +include $(call all-named-subdir-makefiles, lib tests tools benchmarks)

This essentially just disables building intel_sprite_on. Does the
comment in commit 6999b70 no longer apply?



 diff --git a/tools/Android.mk b/tools/Android.mk
 index 39f4512..b5f8008 100644
 --- a/tools/Android.mk
 +++ b/tools/Android.mk
 @@ -41,6 +41,10 @@ skip_tools_list := \
  intel_vga_read \
  intel_vga_write

 +ifneq (${ANDROID_HAS_CAIRO}, 1)
 +skip_tools_list += intel_display_crc
 +endif
 +
  tools_list := $(filter-out $(skip_tools_list),$(bin_PROGRAMS))

  $(foreach item,$(tools_list),$(eval $(call add_tool,$(item
 --
 1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V2] drm/i915/skl+: Add YUV pixel format in Capability list

2015-07-17 Thread Kumar, Mahesh
GEN = 9 supports YUV format for all planes, but it's not exported in
Capability list of primary plane. Add YUV formats in skl_primary_formats
list.
Don't rely on fb-bits_per_pixel as intel_framebuffer_init is not
filling bits_per_pixel field of fb-struct for YUV pixel format.
This leads to divide by zero error during watermark calculation.

V2: Don't break NV12 case.

Signed-off-by: Kumar, Mahesh mahesh1.ku...@intel.com
Cc: Konduru, Chandra chandra.kond...@intel.com
---

 IGT changes made for testcase will be sent in separate patch.

 drivers/gpu/drm/i915/intel_display.c | 4 
 drivers/gpu/drm/i915/intel_pm.c  | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index af0bcfe..d31704a 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -72,6 +72,10 @@ static const uint32_t skl_primary_formats[] = {
DRM_FORMAT_ABGR,
DRM_FORMAT_XRGB2101010,
DRM_FORMAT_XBGR2101010,
+   DRM_FORMAT_YUYV,
+   DRM_FORMAT_YVYU,
+   DRM_FORMAT_UYVY,
+   DRM_FORMAT_VYUY,
 };
 
 /* Cursor formats */
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 5eeddc9..5768f8c 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3164,7 +3164,8 @@ static void skl_compute_wm_pipe_parameters(struct 
drm_crtc *crtc,
if (fb) {
p-plane[0].enabled = true;
p-plane[0].bytes_per_pixel = fb-pixel_format == 
DRM_FORMAT_NV12 ?
-   drm_format_plane_cpp(fb-pixel_format, 1) : 
fb-bits_per_pixel / 8;
+   drm_format_plane_cpp(fb-pixel_format, 1) :
+   drm_format_plane_cpp(fb-pixel_format, 0);
p-plane[0].y_bytes_per_pixel = fb-pixel_format == 
DRM_FORMAT_NV12 ?
drm_format_plane_cpp(fb-pixel_format, 0) : 0;
p-plane[0].tiling = fb-modifier[0];
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3] drm/i915: Use two 32bit reads for select 64bit REG_READ ioctls

2015-07-17 Thread Michał Winiarski
On Thu, Jul 16, 2015 at 12:37:56PM +0100, Chris Wilson wrote:
 Since the hardware sometimes mysteriously totally flummoxes the 64bit
 read of a 64bit register when read using a single instruction, split the
 read into two instructions. Since the read here is of automatically
 incrementing timestamp counters, we also have to be very careful in
 order to make sure that it does not increment between the two
 instructions.
 
 However, since userspace tried to workaround this issue and so enshrined
 this ABI for a broken hardware read and in the process neglected that
 the read only fails in some environments, we have to introduce a new
 uABI flag for userspace to request the 2x32 bit accurate read of the
 timestamp.
 
 v2: Fix alignment check and include details of the workaround for
 userspace.
 
 Reported-by: Karol Herbst freedesk...@karolherbst.de
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91317
 Testcase: igt/gem_reg_read
Tested-by: Michał Winiarski michal.winiar...@intel.com
 Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
 Cc: Michał Winiarski michal.winiar...@intel.com
 Cc: sta...@vger.kernel.org
 ---
  drivers/gpu/drm/i915/intel_uncore.c | 26 +++---
  include/uapi/drm/i915_drm.h |  8 
  2 files changed, 27 insertions(+), 7 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
 b/drivers/gpu/drm/i915/intel_uncore.c
 index 2c477663d378..eb244b57b3fd 100644
 --- a/drivers/gpu/drm/i915/intel_uncore.c
 +++ b/drivers/gpu/drm/i915/intel_uncore.c
 @@ -1310,10 +1310,12 @@ int i915_reg_read_ioctl(struct drm_device *dev,
   struct drm_i915_private *dev_priv = dev-dev_private;
   struct drm_i915_reg_read *reg = data;
   struct register_whitelist const *entry = whitelist;
 + unsigned size;
 + u64 offset;
   int i, ret = 0;
  
   for (i = 0; i  ARRAY_SIZE(whitelist); i++, entry++) {
 - if (entry-offset == reg-offset 
 + if (entry-offset == (reg-offset  -entry-size) 
   (1  INTEL_INFO(dev)-gen  entry-gen_bitmask))
   break;
   }
 @@ -1321,23 +1323,33 @@ int i915_reg_read_ioctl(struct drm_device *dev,
   if (i == ARRAY_SIZE(whitelist))
   return -EINVAL;
  
 + /* We use the low bits to encode extra flags as the register should
 +  * be naturally aligned (and those that are not so aligned merely
 +  * limit the available flags for that register).
 +  */
 + offset = entry-offset;
 + size = entry-size;
 + size |= reg-offset ^ offset;
 +
   intel_runtime_pm_get(dev_priv);
  
 - switch (entry-size) {
 + switch (size) {
 + case 8 | 1:
 + reg-val = I915_READ64_2x32(offset, offset+4);
 + break;
   case 8:
 - reg-val = I915_READ64(reg-offset);
 + reg-val = I915_READ64(offset);
   break;
   case 4:
 - reg-val = I915_READ(reg-offset);
 + reg-val = I915_READ(offset);
   break;
   case 2:
 - reg-val = I915_READ16(reg-offset);
 + reg-val = I915_READ16(offset);
   break;
   case 1:
 - reg-val = I915_READ8(reg-offset);
 + reg-val = I915_READ8(offset);
   break;
   default:
 - MISSING_CASE(entry-size);
   ret = -EINVAL;
   goto out;
   }
 diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
 index b0f82ddab987..83f60f01dca2 100644
 --- a/include/uapi/drm/i915_drm.h
 +++ b/include/uapi/drm/i915_drm.h
 @@ -1087,6 +1087,14 @@ struct drm_i915_reg_read {
   __u64 offset;
   __u64 val; /* Return value */
  };
 +/* Known registers:
 + *
 + * Render engine timestamp - 0x2358 + 64bit - gen7+
 + * - Note this register returns an invalid value if using the default
 + *   single instruction 8byte read, in order to workaround that use
 + *   offset (0x2538 | 1) instead.
 + *
 + */
  
  struct drm_i915_reset_stats {
   __u32 ctx_id;
 -- 
 2.1.4
 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915/skl: Drop the preliminary_hw_support flag

2015-07-17 Thread Daniel Vetter
On Thu, Jul 16, 2015 at 05:08:09PM +0100, Damien Lespiau wrote:
 Time to light a candle and remove the preliminary_hw_support flag.
 
 Signed-off-by: Damien Lespiau damien.lesp...@intel.com

Both applied to dinq, thanks.
-Daniel

 ---
  drivers/gpu/drm/i915/i915_drv.c | 2 --
  1 file changed, 2 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
 index e44dc0d..c8daa2d 100644
 --- a/drivers/gpu/drm/i915/i915_drv.c
 +++ b/drivers/gpu/drm/i915/i915_drv.c
 @@ -356,7 +356,6 @@ static const struct intel_device_info 
 intel_cherryview_info = {
  };
  
  static const struct intel_device_info intel_skylake_info = {
 - .is_preliminary = 1,
   .is_skylake = 1,
   .gen = 9, .num_pipes = 3,
   .need_gfx_hws = 1, .has_hotplug = 1,
 @@ -369,7 +368,6 @@ static const struct intel_device_info intel_skylake_info 
 = {
  };
  
  static const struct intel_device_info intel_skylake_gt3_info = {
 - .is_preliminary = 1,
   .is_skylake = 1,
   .gen = 9, .num_pipes = 3,
   .need_gfx_hws = 1, .has_hotplug = 1,
 -- 
 2.1.0
 
 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH i-g-t v3] Android.mk: Disable tools that do not build for android

2015-07-17 Thread Morton, Derek J


-Original Message-
From: Morton, Derek J 
Sent: Friday, July 17, 2015 9:44 AM
To: intel-gfx@lists.freedesktop.org
Cc: Wood, Thomas; Morton, Derek J
Subject: [PATCH i-g-t v3] Android.mk: Disable tools that do not build for 
android

Disable the tools / demo code that do not currently build for android until 
they can be fixed.

Affected tools / demos
intel_display_crc
intel_sprite_on

v2: intel_display_crc compiled conditionally on ANDROID_HAS_CAIRO flag.
v3: removed intel_reg from the skip list as Thomas has prepared a patch to fix 
it for Android.

Signed-off-by: Derek Morton derek.j.mor...@intel.com
---
 Android.mk   | 2 +-
 tools/Android.mk | 5 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/Android.mk b/Android.mk
index 1ab3e64..681d114 100644
--- a/Android.mk
+++ b/Android.mk
@@ -1,2 +1,2 @@
-include $(call all-named-subdir-makefiles, lib tests tools benchmarks demos)
+include $(call all-named-subdir-makefiles, lib tests tools benchmarks)
 
diff --git a/tools/Android.mk b/tools/Android.mk index 39f4512..4be0032 100644
--- a/tools/Android.mk
+++ b/tools/Android.mk
@@ -37,10 +37,15 @@ endef
 
 skip_tools_list := \
 intel_framebuffer_dump \
+intel_reg \

That's weird, git diff HEAD^ shows this being removed but git format-patch 
HEAD^ has left it in.

 intel_reg_dumper \
 intel_vga_read \
 intel_vga_write
 
+ifneq (${ANDROID_HAS_CAIRO}, 1)
+skip_tools_list += intel_display_crc endif
+
 tools_list := $(filter-out $(skip_tools_list),$(bin_PROGRAMS))
 
 $(foreach item,$(tools_list),$(eval $(call add_tool,$(item
--
1.9.1


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915: Don't reprobe on resume

2015-07-17 Thread Daniel Vetter
On Thu, Jul 16, 2015 at 04:32:44PM +0100, Chris Wilson wrote:
 On Thu, Jul 16, 2015 at 04:47:51PM +0200, Daniel Vetter wrote:
  If we don't force the connector state to unknown there's no reason any
  more to force a reprobe. Also no other driver bothers with this, so
  probably it's not required - userspace handles lid/resume events
  through other channels already.
 
 No, we don't. We don't synthesize any events at all for changing
 connectors whilst suspended and userspace doesn't know about being
 suspended.

One night of sleep does wonders ;-) I agree the patch is crap and my
thinking that it's been broken since ages is also: We start the poll
helper right away and that will take care of all the non-hpd ports. It's
all fine as-is.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH i-g-t v3] Android.mk: Disable tools that do not build for android

2015-07-17 Thread Derek Morton
Disable the tools / demo code that do not currently build
for android until they can be fixed.

Affected tools / demos
intel_display_crc
intel_sprite_on

v2: intel_display_crc compiled conditionally on ANDROID_HAS_CAIRO
flag.
v3: removed intel_reg from the skip list as Thomas has prepared
a patch to fix it for Android.

Signed-off-by: Derek Morton derek.j.mor...@intel.com
---
 Android.mk   | 2 +-
 tools/Android.mk | 4 
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/Android.mk b/Android.mk
index 1ab3e64..681d114 100644
--- a/Android.mk
+++ b/Android.mk
@@ -1,2 +1,2 @@
-include $(call all-named-subdir-makefiles, lib tests tools benchmarks demos)
+include $(call all-named-subdir-makefiles, lib tests tools benchmarks)
 
diff --git a/tools/Android.mk b/tools/Android.mk
index 39f4512..b5f8008 100644
--- a/tools/Android.mk
+++ b/tools/Android.mk
@@ -41,6 +41,10 @@ skip_tools_list := \
 intel_vga_read \
 intel_vga_write
 
+ifneq (${ANDROID_HAS_CAIRO}, 1)
+skip_tools_list += intel_display_crc
+endif
+
 tools_list := $(filter-out $(skip_tools_list),$(bin_PROGRAMS))
 
 $(foreach item,$(tools_list),$(eval $(call add_tool,$(item
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/bxt: WA for swapped HPD pins in A stepping

2015-07-17 Thread Sonika Jindal
As per bspec, on BXT A0/A1, sw needs to activate DDIA HPD logic
and interrupts to check the external panel connection and DDIC HPD
logic for edp panel.

v2: For DP, irq_port is used to determine the encoder instead of
hpd_pin and removing the edp HPD logic because port A HPD is not
present(Imre)

Signed-off-by: Sonika Jindal sonika.jin...@intel.com
---
 drivers/gpu/drm/i915/intel_ddi.c  |   10 +-
 drivers/gpu/drm/i915/intel_hdmi.c |9 -
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index e2c6f73..777e3a3 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -3225,7 +3225,15 @@ void intel_ddi_init(struct drm_device *dev, enum port 
port)
goto err;
 
intel_dig_port-hpd_pulse = intel_dp_hpd_pulse;
-   dev_priv-hotplug.irq_port[port] = intel_dig_port;
+   /*
+* On BXT A0/A1, sw needs to activate DDIA HPD logic and
+* interrupts to check the external panel connection.
+*/
+   if (IS_BROXTON(dev_priv)  (INTEL_REVID(dev)  BXT_REVID_B0)
+ port == PORT_B)
+   dev_priv-hotplug.irq_port[PORT_A] = intel_dig_port;
+   else
+   dev_priv-hotplug.irq_port[port] = intel_dig_port;
}
 
/* In theory we don't need the encoder-type check, but leave it just in
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c 
b/drivers/gpu/drm/i915/intel_hdmi.c
index 70bad5b..94fa716 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1973,7 +1973,14 @@ void intel_hdmi_init_connector(struct intel_digital_port 
*intel_dig_port,
intel_hdmi-ddc_bus = GMBUS_PIN_1_BXT;
else
intel_hdmi-ddc_bus = GMBUS_PIN_DPB;
-   intel_encoder-hpd_pin = HPD_PORT_B;
+   /*
+* On BXT A0/A1, sw needs to activate DDIA HPD logic and
+* interrupts to check the external panel connection.
+*/
+   if (IS_BROXTON(dev_priv)  (INTEL_REVID(dev)  BXT_REVID_B0))
+   intel_encoder-hpd_pin = HPD_PORT_A;
+   else
+   intel_encoder-hpd_pin = HPD_PORT_B;
break;
case PORT_C:
if (IS_BROXTON(dev_priv))
-- 
1.7.10.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH i-g-t v3] Android.mk: Disable tools that do not build for android

2015-07-17 Thread Derek Morton
Disable the tools / demo code that do not currently build
for android until they can be fixed.

Affected tools / demos
intel_display_crc
intel_sprite_on

v2: intel_display_crc compiled conditionally on ANDROID_HAS_CAIRO
flag.
v3: removed intel_reg from the skip list as Thomas has prepared
a patch to fix it for Android.

Signed-off-by: Derek Morton derek.j.mor...@intel.com
---
 Android.mk   | 2 +-
 tools/Android.mk | 5 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/Android.mk b/Android.mk
index 1ab3e64..681d114 100644
--- a/Android.mk
+++ b/Android.mk
@@ -1,2 +1,2 @@
-include $(call all-named-subdir-makefiles, lib tests tools benchmarks demos)
+include $(call all-named-subdir-makefiles, lib tests tools benchmarks)
 
diff --git a/tools/Android.mk b/tools/Android.mk
index 39f4512..4be0032 100644
--- a/tools/Android.mk
+++ b/tools/Android.mk
@@ -37,10 +37,15 @@ endef
 
 skip_tools_list := \
 intel_framebuffer_dump \
+intel_reg \
 intel_reg_dumper \
 intel_vga_read \
 intel_vga_write
 
+ifneq (${ANDROID_HAS_CAIRO}, 1)
+skip_tools_list += intel_display_crc
+endif
+
 tools_list := $(filter-out $(skip_tools_list),$(bin_PROGRAMS))
 
 $(foreach item,$(tools_list),$(eval $(call add_tool,$(item
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Fix divide by zero on watermark update

2015-07-17 Thread Daniel Vetter
On Thu, Jul 16, 2015 at 01:43:15PM -0300, Paulo Zanoni wrote:
 2015-07-16 13:36 GMT-03:00 Mika Kuoppala mika.kuopp...@linux.intel.com:
  Fix divide by zero if we end up updating the watermarks
  with zero dotclock.
 
  This is a stop gap measure to allow module load in cases
  where our state keeping fails.
 
  v2: WARN_ON added (Paulo)
 
 Since we're not hiding the problem (due to the WARN_ON) and the patch
 improves the current situation:
 Reviewed-by: Paulo Zanoni paulo.r.zan...@intel.com

Queued for -next, thanks for the patch.
-Daniel

 
 
  Cc: Paulo Zanoni przan...@gmail.com
  Cc: Damien Lespiau damien.lesp...@intel.com
  Signed-off-by: Mika Kuoppala mika.kuopp...@intel.com
  ---
   drivers/gpu/drm/i915/intel_pm.c | 4 +++-
   1 file changed, 3 insertions(+), 1 deletion(-)
 
  diff --git a/drivers/gpu/drm/i915/intel_pm.c 
  b/drivers/gpu/drm/i915/intel_pm.c
  index 5eeddc9..0d3e014 100644
  --- a/drivers/gpu/drm/i915/intel_pm.c
  +++ b/drivers/gpu/drm/i915/intel_pm.c
  @@ -3316,8 +3316,10 @@ skl_compute_linetime_wm(struct drm_crtc *crtc, 
  struct skl_pipe_wm_parameters *p)
  if (!to_intel_crtc(crtc)-active)
  return 0;
 
  -   return DIV_ROUND_UP(8 * p-pipe_htotal * 1000, p-pixel_rate);
  +   if (WARN_ON(p-pixel_rate == 0))
  +   return 0;
 
  +   return DIV_ROUND_UP(8 * p-pipe_htotal * 1000, p-pixel_rate);
   }
 
   static void skl_compute_transition_wm(struct drm_crtc *crtc,
  --
  2.1.4
 
 
 
 
 -- 
 Paulo Zanoni
 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] igt/kms_plane_scaling : plane scaling enhancement and plane support for bxt

2015-07-17 Thread Nabendu Maiti
1) Added more overlay plane support for BXT.
2) Added and enhanced scaler test cases for additional planes.

Signed-off-by: Nabendu Maiti nabendu.bikash.ma...@intel.com
---
 lib/igt_fb.c  | 40 
 lib/igt_fb.h  |  1 +
 lib/igt_kms.c |  1 +
 lib/igt_kms.h |  3 +-
 tests/kms_plane_scaling.c | 79 ---
 5 files changed, 118 insertions(+), 6 deletions(-)

diff --git a/lib/igt_fb.c b/lib/igt_fb.c
index 134dbd2..e61b762 100644
--- a/lib/igt_fb.c
+++ b/lib/igt_fb.c
@@ -393,6 +393,46 @@ void igt_paint_image(cairo_t *cr, const char *filename,
 }
 
 /**
+ * igt_paint_cross_ruler:
+ * @cr: cairo drawing context
+ * @dst_width: width of the horizontal ruler
+ * @dst_height: height of the vertical ruler
+ *
+ * This function can be used to draw a cross ruler on a frame buffer.
+ */
+void
+igt_paint_cross_ruler(cairo_t *cr, uint16_t w, uint16_t h)
+{
+
+   uint16_t i;
+
+   /* Paint corner markers */
+   paint_marker(cr, 0, 0);
+   paint_marker(cr, w, 0);
+   paint_marker(cr, 0, h);
+   paint_marker(cr, w, h);
+
+
+   cairo_move_to(cr, w/2, 0);
+   cairo_line_to(cr, w/2, h);
+
+   cairo_set_source_rgb(cr, 4, 1, 10);
+   cairo_move_to(cr, 0, h/2);
+   cairo_line_to(cr, w, h/2 );
+   cairo_stroke(cr);
+
+   cairo_set_source_rgb(cr, 2, 5, 1);
+   cairo_set_line_width(cr, 2);
+   cairo_stroke(cr);
+   cairo_stroke_preserve(cr);
+   for (i = 0; i  w; i +=200)
+   paint_marker(cr, i, h/2);
+
+   for (i = 0; i  h; i +=200)
+   paint_marker(cr, w/2, i);
+}
+
+/**
  * igt_create_fb_with_bo_size:
  * @fd: open i915 drm file descriptor
  * @width: width of the framebuffer in pixel
diff --git a/lib/igt_fb.h b/lib/igt_fb.h
index a07acd2..cf6e7e3 100644
--- a/lib/igt_fb.h
+++ b/lib/igt_fb.h
@@ -98,6 +98,7 @@ void igt_write_fb_to_png(int fd, struct igt_fb *fb, const 
char *filename);
 int igt_cairo_printf_line(cairo_t *cr, enum igt_text_align align,
   double yspacing, const char *fmt, ...)
   __attribute__((format (printf, 4, 5)));
+void igt_paint_cross_ruler(cairo_t *cr, uint16_t w, uint16_t h);
 
 /* helpers to handle drm fourcc codes */
 uint32_t igt_bpp_depth_to_drm_format(int bpp, int depth);
diff --git a/lib/igt_kms.c b/lib/igt_kms.c
index 0bb16b4..781ffa5 100644
--- a/lib/igt_kms.c
+++ b/lib/igt_kms.c
@@ -213,6 +213,7 @@ const char *kmstest_plane_name(enum igt_plane plane)
[IGT_PLANE_1] = plane1,
[IGT_PLANE_2] = plane2,
[IGT_PLANE_3] = plane3,
+   [IGT_PLANE_4] = plane4,
[IGT_PLANE_CURSOR] = cursor,
};
 
diff --git a/lib/igt_kms.h b/lib/igt_kms.h
index 09c08aa..14c8b28 100644
--- a/lib/igt_kms.h
+++ b/lib/igt_kms.h
@@ -53,6 +53,7 @@ enum igt_plane {
 IGT_PLANE_PRIMARY = IGT_PLANE_1,
 IGT_PLANE_2,
 IGT_PLANE_3,
+IGT_PLANE_4,
 IGT_PLANE_CURSOR,
 };
 
@@ -205,7 +206,7 @@ struct igt_pipe {
igt_display_t *display;
enum pipe pipe;
bool enabled;
-#define IGT_MAX_PLANES 4
+#define IGT_MAX_PLANES 5
int n_planes;
igt_plane_t planes[IGT_MAX_PLANES];
uint64_t background; /* Background color MSB BGR 16bpc LSB */
diff --git a/tests/kms_plane_scaling.c b/tests/kms_plane_scaling.c
index 00db5cb..8e3d559 100644
--- a/tests/kms_plane_scaling.c
+++ b/tests/kms_plane_scaling.c
@@ -23,7 +23,7 @@
  */
 
 #include math.h
-
+#include cairo.h
 #include drmtest.h
 #include igt_debugfs.h
 #include igt_kms.h
@@ -48,9 +48,11 @@ typedef struct {
struct igt_fb fb1;
struct igt_fb fb2;
struct igt_fb fb3;
+   struct igt_fb fb4;
int fb_id1;
int fb_id2;
int fb_id3;
+   int fb_id4;
 
igt_plane_t *plane1;
igt_plane_t *plane2;
@@ -61,6 +63,22 @@ typedef struct {
 #define FILE_NAME   1080p-left.png
 
 static void
+paint_plane_ID(data_t *d, struct igt_fb *fb, igt_plane_t *plane)
+{
+   cairo_t *cr;
+
+   cr = igt_get_cairo_ctx(d-drm_fd, fb);
+   cairo_move_to(cr, (fb-width/5),
+ (fb-height / 5));
+   cairo_set_font_size(cr, 25);
+   igt_cairo_printf_line(cr, align_hcenter, 10, PIPE:PLANE:);
+   cairo_set_font_size(cr, 30);
+   igt_cairo_printf_line(cr, align_hcenter, 40,  %d:%d,
+ plane-pipe-pipe, plane-index);
+   cairo_destroy(cr);
+}
+
+static void
 paint_color(data_t *d, struct igt_fb *fb, uint16_t w, uint16_t h)
 {
cairo_t *cr;
@@ -71,12 +89,14 @@ paint_color(data_t *d, struct igt_fb *fb, uint16_t w, 
uint16_t h)
 }
 
 static void
-paint_image(data_t *d, struct igt_fb *fb, uint16_t w, uint16_t h)
+paint_image(const char *filename, data_t *d, struct igt_fb *fb,
+   uint16_t w, uint16_t h)
 {
cairo_t *cr;
 
cr = igt_get_cairo_ctx(d-drm_fd, fb);
-  

Re: [Intel-gfx] [RFC] drm/i915/gtt: Allow = 4GB offsets in X86_32

2015-07-17 Thread Tvrtko Ursulin


On 07/16/2015 05:18 PM, Michel Thierry wrote:

Commit c44ef60e4370 (drm/i915/gtt: Allow = 4GB sizes for vm.) took care
of most of this changes, but i915_gem_obj_offset still returned an unsigned
long, which in only 4-bytes long in 32-bit kernels.

Change return type (and other related offset variables) to u64.

Since Global GTT is always limited to 4GB, this change is not required
in i915_gem_obj_ggtt_offset.


Although in another patch dealing with GGTT I was asked to use u64 
explicitly so how to make sure we get some consistency in this area?


Regards,

Tvrtko
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v1 0/4] Add Pooled EU support to BXT

2015-07-17 Thread Arun Siluvery
This patches adds support to enable Pooled EU feature in BXT.

This series has a dependency with Patch1 from Mika hence sending it
as part of the series (already reviewed).

Patch2 - adds a framework to extend the golden context batch through
which we can add Gen based commands to enable specific features, in this
case it is used to enabled Pooled EU (Patch3)

This is on the idea suggested by Chris Wilson to send two batches instead
of the previous approach of patching the binary data which is unnecessarily
complicated (http://www.spinics.net/lists/intel-gfx/msg71498.html).

Patch4 - option for the userspace to query it's availability.


Arun Siluvery (3):
  drm/i915: Add provision to extend Golden context batch
  drm/i915:bxt: Enable Pooled EU support
  drm/i915/bxt: Add get_param to query Pooled EU availability

Mika Kuoppala (1):
  drm/i915: Do kunmap if renderstate parsing fails

 drivers/gpu/drm/i915/i915_dma.c  |  3 ++
 drivers/gpu/drm/i915/i915_drv.c  |  1 +
 drivers/gpu/drm/i915/i915_drv.h  |  5 ++-
 drivers/gpu/drm/i915/i915_gem_render_state.c | 52 ++--
 drivers/gpu/drm/i915/i915_gem_render_state.h |  2 ++
 drivers/gpu/drm/i915/i915_reg.h  |  2 ++
 drivers/gpu/drm/i915/intel_lrc.c |  6 
 include/uapi/drm/i915_drm.h  |  1 +
 8 files changed, 69 insertions(+), 3 deletions(-)

-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v1 4/4] drm/i915/bxt: Add get_param to query Pooled EU availability

2015-07-17 Thread Arun Siluvery
User space clients need to know when the pooled EU feature is present
and enabled on the hardware so that they can adapt work submissions.
Create a new device info flag for this purpose, and create a new GETPARAM
entry to allow user space to query its setting.

Set has_pooled_eu to true in the Broxton static device info - Broxton
supports the feature in hardware and the driver will enable it by
default.

Signed-off-by: Jeff McGee jeff.mc...@intel.com
Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com
---
 drivers/gpu/drm/i915/i915_dma.c | 3 +++
 drivers/gpu/drm/i915/i915_drv.c | 1 +
 drivers/gpu/drm/i915/i915_drv.h | 5 -
 include/uapi/drm/i915_drm.h | 1 +
 4 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 5e63076..6c31beb 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_RESOURCE_STREAMER:
value = HAS_RESOURCE_STREAMER(dev);
break;
+   case I915_PARAM_HAS_POOLED_EU:
+   value = HAS_POOLED_EU(dev);
+   break;
default:
DRM_DEBUG(Unknown parameter %d\n, param-param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index e44dc0d..213f74d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -389,6 +389,7 @@ static const struct intel_device_info intel_broxton_info = {
.num_pipes = 3,
.has_ddi = 1,
.has_fbc = 1,
+   .has_pooled_eu = 1,
GEN_DEFAULT_PIPEOFFSETS,
IVB_CURSOR_OFFSETS,
 };
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 768d1db..32850a8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -775,7 +775,8 @@ struct intel_csr {
func(supports_tv) sep \
func(has_llc) sep \
func(has_ddi) sep \
-   func(has_fpga_dbg)
+   func(has_fpga_dbg) sep \
+   func(has_pooled_eu)
 
 #define DEFINE_FLAG(name) u8 name:1
 #define SEP_SEMICOLON ;
@@ -2549,6 +2550,8 @@ struct drm_i915_cmd_table {
 #define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \
INTEL_INFO(dev)-gen = 8)
 
+#define HAS_POOLED_EU(dev) (INTEL_INFO(dev)-has_pooled_eu)
+
 #define INTEL_PCH_DEVICE_ID_MASK   0xff00
 #define INTEL_PCH_IBX_DEVICE_ID_TYPE   0x3b00
 #define INTEL_PCH_CPT_DEVICE_ID_TYPE   0x1c00
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e7c29f1..9649577 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_EU_TOTAL 34
 #define I915_PARAM_HAS_GPU_RESET35
 #define I915_PARAM_HAS_RESOURCE_STREAMER 36
+#define I915_PARAM_HAS_POOLED_EU 37
 
 typedef struct drm_i915_getparam {
int param;
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v1 1/4] drm/i915: Do kunmap if renderstate parsing fails

2015-07-17 Thread Arun Siluvery
From: Mika Kuoppala mika.kuopp...@linux.intel.com

Kunmap the renderstate page on error path.

Reviewed-by: Arun Siluvery arun.siluv...@linux.intel.com
Signed-off-by: Mika Kuoppala mika.kuopp...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_render_state.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
b/drivers/gpu/drm/i915/i915_gem_render_state.c
index a0201fc..b6492fe 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -96,8 +96,10 @@ static int render_state_setup(struct render_state *so)
s = lower_32_bits(r);
if (so-gen = 8) {
if (i + 1 = rodata-batch_items ||
-   rodata-batch[i + 1] != 0)
-   return -EINVAL;
+   rodata-batch[i + 1] != 0) {
+   ret = -EINVAL;
+   goto err_out;
+   }
 
d[i++] = s;
s = upper_32_bits(r);
@@ -120,6 +122,10 @@ static int render_state_setup(struct render_state *so)
}
 
return 0;
+
+err_out:
+   kunmap(page);
+   return ret;
 }
 
 void i915_gem_render_state_fini(struct render_state *so)
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH i-g-t v3] Android.mk: Disable tools that do not build for android

2015-07-17 Thread Morton, Derek J


-Original Message-
From: Thomas Wood [mailto:thomas.w...@intel.com] 
Sent: Friday, July 17, 2015 3:18 PM
To: Morton, Derek J
Cc: Intel Graphics Development
Subject: Re: [PATCH i-g-t v3] Android.mk: Disable tools that do not build for 
android

On 17 July 2015 at 10:15, Derek Morton derek.j.mor...@intel.com wrote:
 Disable the tools / demo code that do not currently build for android 
 until they can be fixed.

 Affected tools / demos
 intel_display_crc
 intel_sprite_on

 v2: intel_display_crc compiled conditionally on ANDROID_HAS_CAIRO 
 flag.
 v3: removed intel_reg from the skip list as Thomas has prepared a 
 patch to fix it for Android.

 Signed-off-by: Derek Morton derek.j.mor...@intel.com
 ---
  Android.mk   | 2 +-
  tools/Android.mk | 4 
  2 files changed, 5 insertions(+), 1 deletion(-)

 diff --git a/Android.mk b/Android.mk
 index 1ab3e64..681d114 100644
 --- a/Android.mk
 +++ b/Android.mk
 @@ -1,2 +1,2 @@
 -include $(call all-named-subdir-makefiles, lib tests tools benchmarks 
 demos)
 +include $(call all-named-subdir-makefiles, lib tests tools 
 +benchmarks)

This essentially just disables building intel_sprite_on. Does the comment in 
commit 6999b70 no longer apply?

With this patch the IGT automatic build test would not need to apply the patch 
referred to in commit 6999b70. That local patch would still be required by 
anyone wishing to build intel_sprite_on for android.

This patch is aimed at reducing friction by making the vanilla IGT code build 
for android as the local patch referred to is not considered upstreamable.

//Derek



 diff --git a/tools/Android.mk b/tools/Android.mk index 
 39f4512..b5f8008 100644
 --- a/tools/Android.mk
 +++ b/tools/Android.mk
 @@ -41,6 +41,10 @@ skip_tools_list := \
  intel_vga_read \
  intel_vga_write

 +ifneq (${ANDROID_HAS_CAIRO}, 1)
 +skip_tools_list += intel_display_crc endif
 +
  tools_list := $(filter-out $(skip_tools_list),$(bin_PROGRAMS))

  $(foreach item,$(tools_list),$(eval $(call add_tool,$(item
 --
 1.9.1


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v1 3/4] drm/i915:bxt: Enable Pooled EU support

2015-07-17 Thread Arun Siluvery
This mode allows to assign EUs to pools.
The command to enable this mode is sent in auxiliary golden context batch
as this is only issued once with each context initialization. Thanks to
Mika for the preliminary review.

Cc: Mika Kuoppala mika.kuopp...@intel.com
Cc: Chris Wilson ch...@chris-wilson.co.uk
Cc: Armin Reese armin.c.re...@intel.com
Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com
---
 drivers/gpu/drm/i915/i915_gem_render_state.c | 15 +++
 drivers/gpu/drm/i915/i915_reg.h  |  2 ++
 2 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
b/drivers/gpu/drm/i915/i915_gem_render_state.c
index b86e382..a41a1b6 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -84,6 +84,7 @@ free_gem:
 
 static int render_state_setup(struct render_state *so)
 {
+   struct drm_device *dev = so-obj-base.dev;
const struct intel_renderstate_rodata *rodata = so-rodata;
unsigned int i = 0, reloc_index = 0;
struct page *page;
@@ -125,6 +126,20 @@ static int render_state_setup(struct render_state *so)
 
so-aux_batch_offset = i * sizeof(u32);
 
+   if (IS_BROXTON(dev)) {
+   u32 pool_config = 0;
+   struct drm_i915_private *dev_priv = to_i915(dev);
+
+   OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE);
+   OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE);
+   if (dev_priv-info.subslice_total == 3)
+   pool_config = 0x00777000;
+   OUT_BATCH(d, i, pool_config);
+   OUT_BATCH(d, i, 0);
+   OUT_BATCH(d, i, 0);
+   OUT_BATCH(d, i, 0);
+   }
+
OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
so-aux_batch_size = (i * sizeof(u32)) - so-aux_batch_offset;
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 9a2ffad..e052499 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -379,6 +379,8 @@
  */
 #define GFX_INSTR(opcode, flags) ((0x3  29) | ((opcode)  24) | (flags))
 
+#define GEN9_MEDIA_POOL_STATE ((0x3  29) | (0x2  27) | (0x5  16) | 4)
+#define   GEN9_MEDIA_POOL_ENABLE  (1  31)
 #define GFX_OP_RASTER_RULES((0x329)|(0x724))
 #define GFX_OP_SCISSOR ((0x329)|(0x1c24)|(0x1019))
 #define   SC_UPDATE_SCISSOR   (0x11)
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v1 2/4] drm/i915: Add provision to extend Golden context batch

2015-07-17 Thread Arun Siluvery
The Golden batch carries 3D state at the beginning so that HW starts with
a known state. It is carried as a binary blob which is auto-generated from
source. The idea was it would be easier to maintain and keep the complexity
out of the kernel which makes sense as we don't really touch it. However if
you really need to update it then you need to update generator source and
keep the binary blob in sync with it.

There is a need to patch this in bxt to send one additional command to enable
a feature. A solution was to patch the binary data with some additional
data structures (included as part of auto-generator source) but it was
unnecessarily complicated.

Chris suggested the idea of having a secondary batch and execute two batch
buffers. It has clear advantages as we needn't touch the base golden batch,
can customize secondary/auxiliary batch depending on Gen and can be carried
in the driver with no dependencies.

This patch adds support for this auxiliary batch which is inserted at the
end of golden batch and is completely independent from it. Thanks to Mika
for the preliminary review.

Cc: Mika Kuoppala mika.kuopp...@intel.com
Cc: Chris Wilson ch...@chris-wilson.co.uk
Cc: Armin Reese armin.c.re...@intel.com
Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com
---
 drivers/gpu/drm/i915/i915_gem_render_state.c | 27 +++
 drivers/gpu/drm/i915/i915_gem_render_state.h |  2 ++
 drivers/gpu/drm/i915/intel_lrc.c |  6 ++
 3 files changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
b/drivers/gpu/drm/i915/i915_gem_render_state.c
index b6492fe..b86e382 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -73,6 +73,15 @@ free_gem:
return ret;
 }
 
+#define OUT_BATCH(batch, i, val)   \
+   do {\
+   if (WARN_ON((i) = PAGE_SIZE / sizeof(u32))) {  \
+   ret = -ENOSPC;  \
+   goto err_out;   \
+   }   \
+   (batch)[(i)++] = (val); \
+   } while(0)
+
 static int render_state_setup(struct render_state *so)
 {
const struct intel_renderstate_rodata *rodata = so-rodata;
@@ -110,6 +119,15 @@ static int render_state_setup(struct render_state *so)
 
d[i++] = s;
}
+
+   while (i % CACHELINE_DWORDS)
+   OUT_BATCH(d, i, MI_NOOP);
+
+   so-aux_batch_offset = i * sizeof(u32);
+
+   OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
+   so-aux_batch_size = (i * sizeof(u32)) - so-aux_batch_offset;
+
kunmap(page);
 
ret = i915_gem_object_set_to_gtt_domain(so-obj, false);
@@ -128,6 +146,8 @@ err_out:
return ret;
 }
 
+#undef OUT_BATCH
+
 void i915_gem_render_state_fini(struct render_state *so)
 {
i915_gem_object_ggtt_unpin(so-obj);
@@ -176,6 +196,13 @@ int i915_gem_render_state_init(struct drm_i915_gem_request 
*req)
if (ret)
goto out;
 
+   ret = req-ring-dispatch_execbuffer(req,
+(so.ggtt_offset + 
so.aux_batch_offset),
+so.aux_batch_size,
+I915_DISPATCH_SECURE);
+   if (ret)
+   goto out;
+
i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
 
 out:
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h 
b/drivers/gpu/drm/i915/i915_gem_render_state.h
index 7aa7372..79de101 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.h
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
@@ -37,6 +37,8 @@ struct render_state {
struct drm_i915_gem_object *obj;
u64 ggtt_offset;
int gen;
+   u32 aux_batch_size;
+   u64 aux_batch_offset;
 };
 
 int i915_gem_render_state_init(struct drm_i915_gem_request *req);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index adb386d..5e4771e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1661,6 +1661,12 @@ static int intel_lr_context_render_state_init(struct 
drm_i915_gem_request *req)
if (ret)
goto out;
 
+   ret = req-ring-emit_bb_start(req,
+  (so.ggtt_offset + so.aux_batch_offset),
+  I915_DISPATCH_SECURE);
+   if (ret)
+   goto out;
+
i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
 
 out:
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v1 2/4] drm/i915: Add provision to extend Golden context batch

2015-07-17 Thread Chris Wilson
On Fri, Jul 17, 2015 at 05:08:52PM +0100, Arun Siluvery wrote:
 The Golden batch carries 3D state at the beginning so that HW starts with
 a known state. It is carried as a binary blob which is auto-generated from
 source. The idea was it would be easier to maintain and keep the complexity
 out of the kernel which makes sense as we don't really touch it. However if
 you really need to update it then you need to update generator source and
 keep the binary blob in sync with it.
 
 There is a need to patch this in bxt to send one additional command to enable
 a feature. A solution was to patch the binary data with some additional
 data structures (included as part of auto-generator source) but it was
 unnecessarily complicated.
 
 Chris suggested the idea of having a secondary batch and execute two batch
 buffers. It has clear advantages as we needn't touch the base golden batch,
 can customize secondary/auxiliary batch depending on Gen and can be carried
 in the driver with no dependencies.
 
 This patch adds support for this auxiliary batch which is inserted at the
 end of golden batch and is completely independent from it. Thanks to Mika
 for the preliminary review.
 
 Cc: Mika Kuoppala mika.kuopp...@intel.com
 Cc: Chris Wilson ch...@chris-wilson.co.uk
 Cc: Armin Reese armin.c.re...@intel.com
 Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com
 ---
  drivers/gpu/drm/i915/i915_gem_render_state.c | 27 +++
  drivers/gpu/drm/i915/i915_gem_render_state.h |  2 ++
  drivers/gpu/drm/i915/intel_lrc.c |  6 ++
  3 files changed, 35 insertions(+)
 
 diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
 b/drivers/gpu/drm/i915/i915_gem_render_state.c
 index b6492fe..b86e382 100644
 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
 +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
 @@ -73,6 +73,15 @@ free_gem:
   return ret;
  }
  
 +#define OUT_BATCH(batch, i, val) \
 + do {\
 + if (WARN_ON((i) = PAGE_SIZE / sizeof(u32))) {  \

We have to be slightly more careful here, as we don't have the full page
available since we put render state into the high arena of the golden
bb. Something like WARN_ON(i  PAGE/sizeof(u32) || (batch)[i]) should
suffice.

 @@ -110,6 +119,15 @@ static int render_state_setup(struct render_state *so)
  
   d[i++] = s;
   }
 +
 + while (i % CACHELINE_DWORDS)
 + OUT_BATCH(d, i, MI_NOOP);
 +
 + so-aux_batch_offset = i * sizeof(u32);
 +
 + OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
 + so-aux_batch_size = (i * sizeof(u32)) - so-aux_batch_offset;

Strictly, and if we are passing the batch length we are being strictly
conformant, then the aux_batch_size must be a multiple of 8.

 +
   kunmap(page);
  
   ret = i915_gem_object_set_to_gtt_domain(so-obj, false);
 @@ -128,6 +146,8 @@ err_out:
   return ret;
  }
  
 +#undef OUT_BATCH
 +
  void i915_gem_render_state_fini(struct render_state *so)
  {
   i915_gem_object_ggtt_unpin(so-obj);
 @@ -176,6 +196,13 @@ int i915_gem_render_state_init(struct 
 drm_i915_gem_request *req)
   if (ret)
   goto out;
  
Then we need only execute this BB if so.aux_batch_size  8

 + ret = req-ring-dispatch_execbuffer(req,
 +  (so.ggtt_offset + 
 so.aux_batch_offset),
 +  so.aux_batch_size,
 +  I915_DISPATCH_SECURE);
 + if (ret)
 + goto out;
 +
   i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v1 3/4] drm/i915:bxt: Enable Pooled EU support

2015-07-17 Thread Chris Wilson
On Fri, Jul 17, 2015 at 05:08:53PM +0100, Arun Siluvery wrote:
 This mode allows to assign EUs to pools.
 The command to enable this mode is sent in auxiliary golden context batch
 as this is only issued once with each context initialization. Thanks to
 Mika for the preliminary review.

A quick explanation for why this has to be in the kernel would be nice.
Privileged instruction?

Not fond of the split between this and patch 4. Patch 4 intoduces one
feature flag that looks different to the one we use here to enable
support.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH i-g-t 1/3] lib: Move gpgpu_fill code to separate file

2015-07-17 Thread Dominik Zeromski
The gpgpu fill utility functions are used in separate test so it's
logical to keep them in separate file. This is similar to what media
spin test did in the past.

Functionally only gpgpu kernel changed. Send instruction payload size
was reduced. Since offset is incremented by 0x10 bytes there is no point
in using larger writes.

Cc: Thomas Wood thomas.w...@intel.com
Signed-off-by: Dominik Zeromski dominik.zerom...@intel.com
---
 lib/Makefile.sources |   4 +-
 lib/gpgpu_fill.c | 422 +++
 lib/gpgpu_fill.h |  37 
 lib/intel_batchbuffer.c  |   1 +
 lib/media_fill.h |   7 -
 lib/media_fill_gen7.c| 151 
 shaders/gpgpu/gpgpu_fill.gxa |  14 +-
 7 files changed, 464 insertions(+), 172 deletions(-)
 create mode 100755 lib/gpgpu_fill.c
 create mode 100644 lib/gpgpu_fill.h

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 7f88b65..695f609 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -32,7 +32,9 @@ libintel_tools_la_SOURCES =   \
media_fill_gen8lp.c \
media_fill_gen9.c   \
media_spin.h\
-   media_spin.c\
+   media_spin.c\
+   gppgu_fill.h\
+   gpgpu_fill.c\
gen7_media.h\
gen8_media.h\
rendercopy_i915.c   \
diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
new file mode 100755
index 000..f0911e6
--- /dev/null
+++ b/lib/gpgpu_fill.c
@@ -0,0 +1,422 @@
+/*
+ * Copyright ?? 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *  Zhenyu Wang zhen...@linux.intel.com
+ *  Dominik Zeromski dominik.zerom...@intel.com
+ */
+
+#include intel_bufmgr.h
+#include i915_drm.h
+
+#include intel_reg.h
+#include drmtest.h
+#include intel_batchbuffer.h
+#include gen7_media.h
+#include gpgpu_fill.h
+
+/* shaders/gpgpu/gpgpu_fill.gxa */
+static const uint32_t gen7_gpgpu_kernel[][4] = {
+   { 0x0041, 0x20200231, 0x0020, 0x },
+   { 0x0041, 0x20400c21, 0x0004, 0x0010 },
+   { 0x0001, 0x20440021, 0x0018, 0x },
+   { 0x0061, 0x20800021, 0x008d, 0x },
+   { 0x0021, 0x20800021, 0x00450040, 0x },
+   { 0x0001, 0x20880061, 0x, 0x000f },
+   { 0x0081, 0x20a00021, 0x0020, 0x },
+   { 0x05800031, 0x24001ca8, 0x0080, 0x060a8000 },
+   { 0x0061, 0x2e21, 0x008d, 0x },
+   { 0x07800031, 0x20001ca8, 0x0e00, 0x8210 },
+};
+
+static uint32_t
+batch_used(struct intel_batchbuffer *batch)
+{
+   return batch-ptr - batch-buffer;
+}
+
+static uint32_t
+batch_align(struct intel_batchbuffer *batch, uint32_t align)
+{
+   uint32_t offset = batch_used(batch);
+   offset = ALIGN(offset, align);
+   batch-ptr = batch-buffer + offset;
+   return offset;
+}
+
+static void *
+batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t align)
+{
+   uint32_t offset = batch_align(batch, align);
+   batch-ptr += size;
+   return memset(batch-buffer + offset, 0, size);
+}
+
+static uint32_t
+batch_offset(struct intel_batchbuffer *batch, void *ptr)
+{
+   return (uint8_t *)ptr - batch-buffer;
+}
+
+static uint32_t
+batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t size,
+  uint32_t align)
+{
+   return batch_offset(batch, memcpy(batch_alloc(batch, size, align), ptr, 
size));
+}
+
+static void
+gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
+{
+   int ret;
+
+   ret = drm_intel_bo_subdata(batch-bo, 0, 4096, batch-buffer);
+   if (ret == 0)
+   ret = drm_intel_bo_mrb_exec(batch-bo, batch_end,
+   NULL, 0, 0, 0);
+   igt_assert(ret == 0);
+}
+
+static 

[Intel-gfx] [PATCH i-g-t 0/3] gpgpu_fill test new hardware support

2015-07-17 Thread Dominik Zeromski
This patches rearrange gpgpu fill library functions and add support for BDW and 
SKL.

Dominik Zeromski (3):
  lib: Move gpgpu_fill code to separate file
  lib/gpgpu_fill: Add BDW support
  lib/gpgpu_fill: Add SKL support

 lib/Makefile.sources |   4 +-
 lib/gpgpu_fill.c | 808 +++
 lib/gpgpu_fill.h |  51 +++
 lib/intel_batchbuffer.c  |   5 +
 lib/media_fill.h |   7 -
 lib/media_fill_gen7.c| 151 
 shaders/gpgpu/gpgpu_fill.gxa |  14 +-
 7 files changed, 868 insertions(+), 172 deletions(-)
 create mode 100755 lib/gpgpu_fill.c
 create mode 100644 lib/gpgpu_fill.h

-- 
1.8.3.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH i-g-t 2/3] lib/gpgpu_fill: Add BDW support

2015-07-17 Thread Dominik Zeromski
BDW changed structure of surface state and interface descriptors.
Commands like state base address, gpgpu walker were extended.

Cc: Thomas Wood thomas.w...@intel.com
Signed-off-by: Dominik Zeromski dominik.zerom...@intel.com
---
 lib/gpgpu_fill.c| 296 +++-
 lib/gpgpu_fill.h|   7 ++
 lib/intel_batchbuffer.c |   2 +
 3 files changed, 299 insertions(+), 6 deletions(-)

diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index f0911e6..c98f121 100755
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -32,6 +32,7 @@
 #include drmtest.h
 #include intel_batchbuffer.h
 #include gen7_media.h
+#include gen8_media.h
 #include gpgpu_fill.h
 
 /* shaders/gpgpu/gpgpu_fill.gxa */
@@ -48,6 +49,19 @@ static const uint32_t gen7_gpgpu_kernel[][4] = {
{ 0x07800031, 0x20001ca8, 0x0e00, 0x8210 },
 };
 
+static const uint32_t gen8_gpgpu_kernel[][4] = {
+   { 0x0041, 0x20202288, 0x0020, 0x },
+   { 0x0041, 0x20400208, 0x0604, 0x0010 },
+   { 0x0001, 0x20440208, 0x0018, 0x },
+   { 0x0061, 0x20800208, 0x008d, 0x },
+   { 0x0021, 0x20800208, 0x00450040, 0x },
+   { 0x0001, 0x20880608, 0x, 0x000f },
+   { 0x0081, 0x20a00208, 0x0020, 0x },
+   { 0x0c800031, 0x24000a40, 0x0e80, 0x060a8000 },
+   { 0x0061, 0x2e000208, 0x008d, 0x },
+   { 0x07800031, 0x2a40, 0x0e000e00, 0x8210 },
+};
+
 static uint32_t
 batch_used(struct intel_batchbuffer *batch)
 {
@@ -97,8 +111,7 @@ gen7_render_flush(struct intel_batchbuffer *batch, uint32_t 
batch_end)
 }
 
 static uint32_t
-gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-   uint8_t color)
+gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch, uint8_t color)
 {
uint8_t *curbe_buffer;
uint32_t offset;
@@ -160,6 +173,58 @@ gen7_fill_surface_state(struct intel_batchbuffer *batch,
 }
 
 static uint32_t
+gen8_fill_surface_state(struct intel_batchbuffer *batch,
+   struct igt_buf *buf,
+   uint32_t format,
+   int is_dst)
+{
+   struct gen8_surface_state *ss;
+   uint32_t write_domain, read_domain, offset;
+   int ret;
+
+   if (is_dst) {
+   write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
+   } else {
+   write_domain = 0;
+   read_domain = I915_GEM_DOMAIN_SAMPLER;
+   }
+
+   ss = batch_alloc(batch, sizeof(*ss), 64);
+   offset = batch_offset(batch, ss);
+
+   ss-ss0.surface_type = GEN8_SURFACE_2D;
+   ss-ss0.surface_format = format;
+   ss-ss0.render_cache_read_write = 1;
+   ss-ss0.vertical_alignment = 1; /* align 4 */
+   ss-ss0.horizontal_alignment = 1; /* align 4 */
+
+   if (buf-tiling == I915_TILING_X)
+   ss-ss0.tiled_mode = 2;
+   else if (buf-tiling == I915_TILING_Y)
+   ss-ss0.tiled_mode = 3;
+
+   ss-ss8.base_addr = buf-bo-offset;
+
+   ret = drm_intel_bo_emit_reloc(batch-bo,
+   batch_offset(batch, ss) + 8 * 4,
+   buf-bo, 0,
+   read_domain, write_domain);
+   igt_assert_eq(ret, 0);
+
+   ss-ss2.height = igt_buf_height(buf) - 1;
+   ss-ss2.width  = igt_buf_width(buf) - 1;
+   ss-ss3.pitch  = buf-stride - 1;
+
+   ss-ss7.shader_chanel_select_r = 4;
+   ss-ss7.shader_chanel_select_g = 5;
+   ss-ss7.shader_chanel_select_b = 6;
+   ss-ss7.shader_chanel_select_a = 7;
+
+   return offset;
+
+}
+
+static uint32_t
 gen7_fill_binding_table(struct intel_batchbuffer *batch,
struct igt_buf *dst)
 {
@@ -174,6 +239,20 @@ gen7_fill_binding_table(struct intel_batchbuffer *batch,
 }
 
 static uint32_t
+gen8_fill_binding_table(struct intel_batchbuffer *batch,
+   struct igt_buf *dst)
+{
+   uint32_t *binding_table, offset;
+
+   binding_table = batch_alloc(batch, 32, 64);
+   offset = batch_offset(batch, binding_table);
+
+   binding_table[0] = gen8_fill_surface_state(batch, dst, 
GEN8_SURFACEFORMAT_R8_UNORM, 1);
+
+   return offset;
+}
+
+static uint32_t
 gen7_fill_gpgpu_kernel(struct intel_batchbuffer *batch,
const uint32_t kernel[][4],
size_t size)
@@ -216,6 +295,37 @@ gen7_fill_interface_descriptor(struct intel_batchbuffer 
*batch, struct igt_buf *
return offset;
 }
 
+static uint32_t
+gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf 
*dst,
+  const uint32_t kernel[][4], size_t size)
+{
+   struct gen8_interface_descriptor_data *idd;
+   uint32_t offset;
+   uint32_t binding_table_offset, kernel_offset;
+
+   binding_table_offset = gen8_fill_binding_table(batch, dst);
+   kernel_offset = gen7_fill_gpgpu_kernel(batch, 

[Intel-gfx] [PATCH i-g-t 3/3] lib/gpgpu_fill: Add SKL support

2015-07-17 Thread Dominik Zeromski
SKL changed state base address command.

Cc: Thomas Wood thomas.w...@intel.com
Signed-off-by: Dominik Zeromski dominik.zerom...@intel.com
---
 lib/gpgpu_fill.c| 102 
 lib/gpgpu_fill.h|   7 
 lib/intel_batchbuffer.c |   2 +
 3 files changed, 111 insertions(+)

diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index c98f121..4d98643 100755
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -62,6 +62,19 @@ static const uint32_t gen8_gpgpu_kernel[][4] = {
{ 0x07800031, 0x2a40, 0x0e000e00, 0x8210 },
 };
 
+static const uint32_t gen9_gpgpu_kernel[][4] = {
+   { 0x0041, 0x20202288, 0x0020, 0x },
+   { 0x0041, 0x20400208, 0x0604, 0x0010 },
+   { 0x0001, 0x20440208, 0x0018, 0x },
+   { 0x0061, 0x20800208, 0x008d, 0x },
+   { 0x0021, 0x20800208, 0x00450040, 0x },
+   { 0x0001, 0x20880608, 0x, 0x000f },
+   { 0x0081, 0x20a00208, 0x0020, 0x },
+   { 0x0c800031, 0x24000a40, 0x0680, 0x060a8000 },
+   { 0x0061, 0x2e000208, 0x008d, 0x },
+   { 0x07800031, 0x2a40, 0x06000e00, 0x8210 },
+};
+
 static uint32_t
 batch_used(struct intel_batchbuffer *batch)
 {
@@ -390,6 +403,47 @@ gen8_emit_state_base_address(struct intel_batchbuffer 
*batch)
 }
 
 static void
+gen9_emit_state_base_address(struct intel_batchbuffer *batch)
+{
+   OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2));
+
+   /* general */
+   OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+   OUT_BATCH(0);
+
+   /* stateless data port */
+   OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+
+   /* surface */
+   OUT_RELOC(batch-bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
+
+   /* dynamic */
+   OUT_RELOC(batch-bo, I915_GEM_DOMAIN_RENDER | 
I915_GEM_DOMAIN_INSTRUCTION,
+   0, BASE_ADDRESS_MODIFY);
+
+   /* indirect */
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+
+   /* instruction */
+   OUT_RELOC(batch-bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 
BASE_ADDRESS_MODIFY);
+
+   /* general state buffer size */
+   OUT_BATCH(0xf000 | 1);
+   /* dynamic state buffer size */
+   OUT_BATCH(1  12 | 1);
+   /* indirect object buffer size */
+   OUT_BATCH(0xf000 | 1);
+   /* intruction buffer size, must set modify enable bit, otherwise it may 
result in GPU hang */
+   OUT_BATCH(1  12 | 1);
+
+   /* Bindless surface state base address */
+   OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+   OUT_BATCH(0);
+   OUT_BATCH(0xf000);
+}
+
+static void
 gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
 {
OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
@@ -704,3 +758,51 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
gen7_render_flush(batch, batch_end);
intel_batchbuffer_reset(batch);
 }
+
+void
+gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
+   struct igt_buf *dst,
+   unsigned x, unsigned y,
+   unsigned width, unsigned height,
+   uint8_t color)
+{
+   uint32_t curbe_buffer, interface_descriptor;
+   uint32_t batch_end;
+
+   intel_batchbuffer_flush(batch);
+
+   /* setup states */
+   batch-ptr = batch-buffer[BATCH_STATE_SPLIT];
+
+   /*
+* const buffer needs to fill for every thread, but as we have just 1 
thread
+* per every group, so need only one curbe data.
+*
+* For each thread, just use thread group ID for buffer offset.
+*/
+   curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
+
+   interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
+ gen9_gpgpu_kernel,
+ 
sizeof(gen9_gpgpu_kernel));
+   igt_assert(batch-ptr  batch-buffer[4095]);
+
+   batch-ptr = batch-buffer;
+
+   /* GPGPU pipeline */
+   OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
+
+   gen9_emit_state_base_address(batch);
+   gen8_emit_vfe_state_gpgpu(batch);
+   gen7_emit_curbe_load(batch, curbe_buffer);
+   gen7_emit_interface_descriptor_load(batch, interface_descriptor);
+   gen8_emit_gpgpu_walk(batch, x, y, width, height);
+
+   OUT_BATCH(MI_BATCH_BUFFER_END);
+
+   batch_end = batch_align(batch, 8);
+   igt_assert(batch_end  BATCH_STATE_SPLIT);
+
+   gen7_render_flush(batch, batch_end);
+   intel_batchbuffer_reset(batch);
+}
diff --git a/lib/gpgpu_fill.h b/lib/gpgpu_fill.h
index 2d14881..7b5c832 100644
--- a/lib/gpgpu_fill.h
+++ b/lib/gpgpu_fill.h
@@ -41,4 +41,11 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
unsigned width, unsigned height,
uint8_t color);
 
+void
+gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
+   struct 

Re: [Intel-gfx] [PATCH v1 2/4] drm/i915: Add provision to extend Golden context batch

2015-07-17 Thread Chris Wilson
On Fri, Jul 17, 2015 at 07:37:45PM +0300, Mika Kuoppala wrote:
 Chris Wilson ch...@chris-wilson.co.uk writes:
 
  On Fri, Jul 17, 2015 at 05:08:52PM +0100, Arun Siluvery wrote:
  The Golden batch carries 3D state at the beginning so that HW starts with
  a known state. It is carried as a binary blob which is auto-generated from
  source. The idea was it would be easier to maintain and keep the complexity
  out of the kernel which makes sense as we don't really touch it. However if
  you really need to update it then you need to update generator source and
  keep the binary blob in sync with it.
  
  There is a need to patch this in bxt to send one additional command to 
  enable
  a feature. A solution was to patch the binary data with some additional
  data structures (included as part of auto-generator source) but it was
  unnecessarily complicated.
  
  Chris suggested the idea of having a secondary batch and execute two batch
  buffers. It has clear advantages as we needn't touch the base golden batch,
  can customize secondary/auxiliary batch depending on Gen and can be carried
  in the driver with no dependencies.
  
  This patch adds support for this auxiliary batch which is inserted at the
  end of golden batch and is completely independent from it. Thanks to Mika
  for the preliminary review.
  
  Cc: Mika Kuoppala mika.kuopp...@intel.com
  Cc: Chris Wilson ch...@chris-wilson.co.uk
  Cc: Armin Reese armin.c.re...@intel.com
  Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com
  ---
   drivers/gpu/drm/i915/i915_gem_render_state.c | 27 
  +++
   drivers/gpu/drm/i915/i915_gem_render_state.h |  2 ++
   drivers/gpu/drm/i915/intel_lrc.c |  6 ++
   3 files changed, 35 insertions(+)
  
  diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
  b/drivers/gpu/drm/i915/i915_gem_render_state.c
  index b6492fe..b86e382 100644
  --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
  +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
  @@ -73,6 +73,15 @@ free_gem:
 return ret;
   }
   
  +#define OUT_BATCH(batch, i, val)  \
  +  do {\
  +  if (WARN_ON((i) = PAGE_SIZE / sizeof(u32))) {  \
 
  We have to be slightly more careful here, as we don't have the full page
  available since we put render state into the high arena of the golden
  bb. Something like WARN_ON(i  PAGE/sizeof(u32) || (batch)[i]) should
  suffice.
 
 
 Null state gen makes the final batch with two passes. First
 it builds command and state separately. And when size of both
 are know, it compacts by relocating the state right after
 the commands (+some alignment).
 
 So we should have the rest of the page usable for auxillary
 commands here as we have already copied the state part
 also.

Ta. Maybe add some words of enlightenment here for future me as well?
Also we will need to document that the kernel then relies on the packing
to add extra commands after the batch to the null state generator.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v1 3/4] drm/i915:bxt: Enable Pooled EU support

2015-07-17 Thread Chris Wilson
On Fri, Jul 17, 2015 at 05:54:20PM +0100, Siluvery, Arun wrote:
 On 17/07/2015 17:27, Chris Wilson wrote:
 On Fri, Jul 17, 2015 at 05:08:53PM +0100, Arun Siluvery wrote:
 This mode allows to assign EUs to pools.
 The command to enable this mode is sent in auxiliary golden context batch
 as this is only issued once with each context initialization. Thanks to
 Mika for the preliminary review.
 
 A quick explanation for why this has to be in the kernel would be nice.
 Privileged instruction?
 
 This purpose of auxiliary batch is explained in patch2, but I can
 add some explanation about this one also.

Here, I am looking for an explanation of why these commands in
particular are desired. Mika's short explanation that must be the same
for all contexts on the system is sufficient.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v1 2/4] drm/i915: Add provision to extend Golden context batch

2015-07-17 Thread Mika Kuoppala
Chris Wilson ch...@chris-wilson.co.uk writes:

 On Fri, Jul 17, 2015 at 05:08:52PM +0100, Arun Siluvery wrote:
 The Golden batch carries 3D state at the beginning so that HW starts with
 a known state. It is carried as a binary blob which is auto-generated from
 source. The idea was it would be easier to maintain and keep the complexity
 out of the kernel which makes sense as we don't really touch it. However if
 you really need to update it then you need to update generator source and
 keep the binary blob in sync with it.
 
 There is a need to patch this in bxt to send one additional command to enable
 a feature. A solution was to patch the binary data with some additional
 data structures (included as part of auto-generator source) but it was
 unnecessarily complicated.
 
 Chris suggested the idea of having a secondary batch and execute two batch
 buffers. It has clear advantages as we needn't touch the base golden batch,
 can customize secondary/auxiliary batch depending on Gen and can be carried
 in the driver with no dependencies.
 
 This patch adds support for this auxiliary batch which is inserted at the
 end of golden batch and is completely independent from it. Thanks to Mika
 for the preliminary review.
 
 Cc: Mika Kuoppala mika.kuopp...@intel.com
 Cc: Chris Wilson ch...@chris-wilson.co.uk
 Cc: Armin Reese armin.c.re...@intel.com
 Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com
 ---
  drivers/gpu/drm/i915/i915_gem_render_state.c | 27 
 +++
  drivers/gpu/drm/i915/i915_gem_render_state.h |  2 ++
  drivers/gpu/drm/i915/intel_lrc.c |  6 ++
  3 files changed, 35 insertions(+)
 
 diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
 b/drivers/gpu/drm/i915/i915_gem_render_state.c
 index b6492fe..b86e382 100644
 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
 +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
 @@ -73,6 +73,15 @@ free_gem:
  return ret;
  }
  
 +#define OUT_BATCH(batch, i, val)\
 +do {\
 +if (WARN_ON((i) = PAGE_SIZE / sizeof(u32))) {  \

 We have to be slightly more careful here, as we don't have the full page
 available since we put render state into the high arena of the golden
 bb. Something like WARN_ON(i  PAGE/sizeof(u32) || (batch)[i]) should
 suffice.


Null state gen makes the final batch with two passes. First
it builds command and state separately. And when size of both
are know, it compacts by relocating the state right after
the commands (+some alignment).

So we should have the rest of the page usable for auxillary
commands here as we have already copied the state part
also.

-Mika

 @@ -110,6 +119,15 @@ static int render_state_setup(struct render_state *so)
  
  d[i++] = s;
  }
 +
 +while (i % CACHELINE_DWORDS)
 +OUT_BATCH(d, i, MI_NOOP);
 +
 +so-aux_batch_offset = i * sizeof(u32);
 +
 +OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
 +so-aux_batch_size = (i * sizeof(u32)) - so-aux_batch_offset;

 Strictly, and if we are passing the batch length we are being strictly
 conformant, then the aux_batch_size must be a multiple of 8.

 +
  kunmap(page);
  
  ret = i915_gem_object_set_to_gtt_domain(so-obj, false);
 @@ -128,6 +146,8 @@ err_out:
  return ret;
  }
  
 +#undef OUT_BATCH
 +
  void i915_gem_render_state_fini(struct render_state *so)
  {
  i915_gem_object_ggtt_unpin(so-obj);
 @@ -176,6 +196,13 @@ int i915_gem_render_state_init(struct 
 drm_i915_gem_request *req)
  if (ret)
  goto out;
  
 Then we need only execute this BB if so.aux_batch_size  8

 +ret = req-ring-dispatch_execbuffer(req,
 + (so.ggtt_offset + 
 so.aux_batch_offset),
 + so.aux_batch_size,
 + I915_DISPATCH_SECURE);
 +if (ret)
 +goto out;
 +
  i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
 -Chris

 -- 
 Chris Wilson, Intel Open Source Technology Centre
 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v1 3/4] drm/i915:bxt: Enable Pooled EU support

2015-07-17 Thread Mika Kuoppala
Chris Wilson ch...@chris-wilson.co.uk writes:

 On Fri, Jul 17, 2015 at 05:08:53PM +0100, Arun Siluvery wrote:
 This mode allows to assign EUs to pools.
 The command to enable this mode is sent in auxiliary golden context batch
 as this is only issued once with each context initialization. Thanks to
 Mika for the preliminary review.

 A quick explanation for why this has to be in the kernel would be nice.
 Privileged instruction?


The pooled mode is global. Once set, it has to stay same
across all contexts until subsequent fw reset.

-Mika

 Not fond of the split between this and patch 4. Patch 4 intoduces one
 feature flag that looks different to the one we use here to enable
 support.
 -Chris

 -- 
 Chris Wilson, Intel Open Source Technology Centre
 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v1 3/4] drm/i915:bxt: Enable Pooled EU support

2015-07-17 Thread Siluvery, Arun

On 17/07/2015 17:27, Chris Wilson wrote:

On Fri, Jul 17, 2015 at 05:08:53PM +0100, Arun Siluvery wrote:

This mode allows to assign EUs to pools.
The command to enable this mode is sent in auxiliary golden context batch
as this is only issued once with each context initialization. Thanks to
Mika for the preliminary review.


A quick explanation for why this has to be in the kernel would be nice.
Privileged instruction?


This purpose of auxiliary batch is explained in patch2, but I can add 
some explanation about this one also.




Not fond of the split between this and patch 4. Patch 4 intoduces one
feature flag that looks different to the one we use here to enable
support.
I will patch4 as separate as it deals with libdrm changes but use the 
feature flag in this one.


regards
Arun


-Chris



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 3/3] drm/i915:bxt: Enable Pooled EU support

2015-07-17 Thread Arun Siluvery
This mode allows to assign EUs to pools which can process work collectively.
The command to enable this mode should be issued as part of context 
initialization.

The pooled mode is global, once enabled it has to stay the same across all
contexts until HW reset hence this is sent in auxiliary golden context batch.
Thanks to Mika for the preliminary review and comments.

v2: explain why this is enabled in golden context, use feature flag while
enabling the support (Chris)

v3: Pooled EU support announced in userspace before enabling in kernel,
to simplify include all changes in the same patch.

User space clients need to know when the pooled EU feature is present
and enabled on the hardware so that they can adapt work submissions.
Create a new device info flag for this purpose, and create a new GETPARAM
entry to allow user space to query its setting.

Set has_pooled_eu to true in the Broxton static device info - Broxton
supports the feature in hardware and the driver will enable it by
default.

Cc: Mika Kuoppala mika.kuopp...@intel.com
Cc: Chris Wilson ch...@chris-wilson.co.uk
Cc: Armin Reese armin.c.re...@intel.com
Signed-off-by: Jeff McGee jeff.mc...@intel.com
Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com
---
 drivers/gpu/drm/i915/i915_dma.c  |  3 +++
 drivers/gpu/drm/i915/i915_drv.c  |  1 +
 drivers/gpu/drm/i915/i915_drv.h  |  5 -
 drivers/gpu/drm/i915/i915_gem_render_state.c | 13 +
 drivers/gpu/drm/i915/i915_reg.h  |  2 ++
 include/uapi/drm/i915_drm.h  |  1 +
 6 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 5e63076..6c31beb 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_RESOURCE_STREAMER:
value = HAS_RESOURCE_STREAMER(dev);
break;
+   case I915_PARAM_HAS_POOLED_EU:
+   value = HAS_POOLED_EU(dev);
+   break;
default:
DRM_DEBUG(Unknown parameter %d\n, param-param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index e44dc0d..213f74d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -389,6 +389,7 @@ static const struct intel_device_info intel_broxton_info = {
.num_pipes = 3,
.has_ddi = 1,
.has_fbc = 1,
+   .has_pooled_eu = 1,
GEN_DEFAULT_PIPEOFFSETS,
IVB_CURSOR_OFFSETS,
 };
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 768d1db..32850a8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -775,7 +775,8 @@ struct intel_csr {
func(supports_tv) sep \
func(has_llc) sep \
func(has_ddi) sep \
-   func(has_fpga_dbg)
+   func(has_fpga_dbg) sep \
+   func(has_pooled_eu)
 
 #define DEFINE_FLAG(name) u8 name:1
 #define SEP_SEMICOLON ;
@@ -2549,6 +2550,8 @@ struct drm_i915_cmd_table {
 #define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \
INTEL_INFO(dev)-gen = 8)
 
+#define HAS_POOLED_EU(dev) (INTEL_INFO(dev)-has_pooled_eu)
+
 #define INTEL_PCH_DEVICE_ID_MASK   0xff00
 #define INTEL_PCH_IBX_DEVICE_ID_TYPE   0x3b00
 #define INTEL_PCH_CPT_DEVICE_ID_TYPE   0x1c00
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 5026a62..8866040 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -93,6 +93,7 @@ free_gem:
 
 static int render_state_setup(struct render_state *so)
 {
+   struct drm_device *dev = so-obj-base.dev;
const struct intel_renderstate_rodata *rodata = so-rodata;
unsigned int i = 0, reloc_index = 0;
struct page *page;
@@ -134,6 +135,18 @@ static int render_state_setup(struct render_state *so)
 
so-aux_batch_offset = i * sizeof(u32);
 
+   if (HAS_POOLED_EU(dev)) {
+   u32 pool_config = (INTEL_INFO(dev)-subslice_total == 3 ?
+  0x00777000 : 0);
+
+   OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE);
+   OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE);
+   OUT_BATCH(d, i, pool_config);
+   OUT_BATCH(d, i, 0);
+   OUT_BATCH(d, i, 0);
+   OUT_BATCH(d, i, 0);
+   }
+
OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
so-aux_batch_size = (i * sizeof(u32)) - so-aux_batch_offset;
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 9a2ffad..e052499 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -379,6 +379,8 @@
  */
 #define GFX_INSTR(opcode, flags) ((0x3  29) | ((opcode)  24) 

Re: [Intel-gfx] [PATCH v2 3/4] drm/i915/bxt: Add get_param to query Pooled EU availability

2015-07-17 Thread Siluvery, Arun

On 17/07/2015 19:13, Arun Siluvery wrote:

User space clients need to know when the pooled EU feature is present
and enabled on the hardware so that they can adapt work submissions.
Create a new device info flag for this purpose, and create a new GETPARAM
entry to allow user space to query its setting.

Set has_pooled_eu to true in the Broxton static device info - Broxton
supports the feature in hardware and the driver will enable it by
default.

Signed-off-by: Jeff McGee jeff.mc...@intel.com
Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com
---


Please ignore this patch, this is squashed with Patch4 drm/i915:bxt: 
Enable Pooled EU support to keep all enabling changes in the same place 
otherwise we would've announced support to userspace before enabling it 
in kernel.


regards
Arun


  drivers/gpu/drm/i915/i915_dma.c | 3 +++
  drivers/gpu/drm/i915/i915_drv.c | 1 +
  drivers/gpu/drm/i915/i915_drv.h | 5 -
  include/uapi/drm/i915_drm.h | 1 +
  4 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 5e63076..6c31beb 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_RESOURCE_STREAMER:
value = HAS_RESOURCE_STREAMER(dev);
break;
+   case I915_PARAM_HAS_POOLED_EU:
+   value = HAS_POOLED_EU(dev);
+   break;
default:
DRM_DEBUG(Unknown parameter %d\n, param-param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index e44dc0d..213f74d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -389,6 +389,7 @@ static const struct intel_device_info intel_broxton_info = {
.num_pipes = 3,
.has_ddi = 1,
.has_fbc = 1,
+   .has_pooled_eu = 1,
GEN_DEFAULT_PIPEOFFSETS,
IVB_CURSOR_OFFSETS,
  };
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 768d1db..32850a8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -775,7 +775,8 @@ struct intel_csr {
func(supports_tv) sep \
func(has_llc) sep \
func(has_ddi) sep \
-   func(has_fpga_dbg)
+   func(has_fpga_dbg) sep \
+   func(has_pooled_eu)

  #define DEFINE_FLAG(name) u8 name:1
  #define SEP_SEMICOLON ;
@@ -2549,6 +2550,8 @@ struct drm_i915_cmd_table {
  #define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \
INTEL_INFO(dev)-gen = 8)

+#define HAS_POOLED_EU(dev) (INTEL_INFO(dev)-has_pooled_eu)
+
  #define INTEL_PCH_DEVICE_ID_MASK  0xff00
  #define INTEL_PCH_IBX_DEVICE_ID_TYPE  0x3b00
  #define INTEL_PCH_CPT_DEVICE_ID_TYPE  0x1c00
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e7c29f1..9649577 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait {
  #define I915_PARAM_EU_TOTAL34
  #define I915_PARAM_HAS_GPU_RESET   35
  #define I915_PARAM_HAS_RESOURCE_STREAMER 36
+#define I915_PARAM_HAS_POOLED_EU 37

  typedef struct drm_i915_getparam {
int param;



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 3/4] drm/i915/bxt: Add get_param to query Pooled EU availability

2015-07-17 Thread Arun Siluvery
User space clients need to know when the pooled EU feature is present
and enabled on the hardware so that they can adapt work submissions.
Create a new device info flag for this purpose, and create a new GETPARAM
entry to allow user space to query its setting.

Set has_pooled_eu to true in the Broxton static device info - Broxton
supports the feature in hardware and the driver will enable it by
default.

Signed-off-by: Jeff McGee jeff.mc...@intel.com
Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com
---
 drivers/gpu/drm/i915/i915_dma.c | 3 +++
 drivers/gpu/drm/i915/i915_drv.c | 1 +
 drivers/gpu/drm/i915/i915_drv.h | 5 -
 include/uapi/drm/i915_drm.h | 1 +
 4 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 5e63076..6c31beb 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_RESOURCE_STREAMER:
value = HAS_RESOURCE_STREAMER(dev);
break;
+   case I915_PARAM_HAS_POOLED_EU:
+   value = HAS_POOLED_EU(dev);
+   break;
default:
DRM_DEBUG(Unknown parameter %d\n, param-param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index e44dc0d..213f74d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -389,6 +389,7 @@ static const struct intel_device_info intel_broxton_info = {
.num_pipes = 3,
.has_ddi = 1,
.has_fbc = 1,
+   .has_pooled_eu = 1,
GEN_DEFAULT_PIPEOFFSETS,
IVB_CURSOR_OFFSETS,
 };
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 768d1db..32850a8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -775,7 +775,8 @@ struct intel_csr {
func(supports_tv) sep \
func(has_llc) sep \
func(has_ddi) sep \
-   func(has_fpga_dbg)
+   func(has_fpga_dbg) sep \
+   func(has_pooled_eu)
 
 #define DEFINE_FLAG(name) u8 name:1
 #define SEP_SEMICOLON ;
@@ -2549,6 +2550,8 @@ struct drm_i915_cmd_table {
 #define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \
INTEL_INFO(dev)-gen = 8)
 
+#define HAS_POOLED_EU(dev) (INTEL_INFO(dev)-has_pooled_eu)
+
 #define INTEL_PCH_DEVICE_ID_MASK   0xff00
 #define INTEL_PCH_IBX_DEVICE_ID_TYPE   0x3b00
 #define INTEL_PCH_CPT_DEVICE_ID_TYPE   0x1c00
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e7c29f1..9649577 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_EU_TOTAL 34
 #define I915_PARAM_HAS_GPU_RESET35
 #define I915_PARAM_HAS_RESOURCE_STREAMER 36
+#define I915_PARAM_HAS_POOLED_EU 37
 
 typedef struct drm_i915_getparam {
int param;
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 2/4] drm/i915: Add provision to extend Golden context batch

2015-07-17 Thread Arun Siluvery
The Golden batch carries 3D state at the beginning so that HW starts with
a known state. It is carried as a binary blob which is auto-generated from
source. The idea was it would be easier to maintain and keep the complexity
out of the kernel which makes sense as we don't really touch it. However if
you really need to update it then you need to update generator source and
keep the binary blob in sync with it.

There is a need to patch this in bxt to send one additional command to enable
a feature. A solution was to patch the binary data with some additional
data structures (included as part of auto-generator source) but it was
unnecessarily complicated.

Chris suggested the idea of having a secondary batch and execute two batch
buffers. It has clear advantages as we needn't touch the base golden batch,
can customize secondary/auxiliary batch depending on Gen and can be carried
in the driver with no dependencies.

This patch adds support for this auxiliary batch which is inserted at the
end of golden batch and is completely independent from it. Thanks to Mika
for the preliminary review.

v2: Strictly conform to the batch size requirements to cover Gen2 and
add comments to clarify overflow check in macro (Chris, Mika).

Cc: Mika Kuoppala mika.kuopp...@intel.com
Cc: Chris Wilson ch...@chris-wilson.co.uk
Cc: Armin Reese armin.c.re...@intel.com
Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com
---
 drivers/gpu/drm/i915/i915_gem_render_state.c | 45 
 drivers/gpu/drm/i915/i915_gem_render_state.h |  2 ++
 drivers/gpu/drm/i915/intel_lrc.c |  6 
 3 files changed, 53 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
b/drivers/gpu/drm/i915/i915_gem_render_state.c
index b6492fe..5026a62 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -73,6 +73,24 @@ free_gem:
return ret;
 }
 
+/*
+ * Macro to add commands to auxiliary batch.
+ * This macro only checks for page overflow before inserting the commands,
+ * this is sufficient as the null state generator makes the final batch
+ * with two passes to build command and state separately. At this point
+ * the size of both are known and it compacts them by relocating the state
+ * right after the commands taking care of aligment so we should sufficient
+ * space below them for adding new commands.
+ */
+#define OUT_BATCH(batch, i, val)   \
+   do {\
+   if (WARN_ON((i) = PAGE_SIZE / sizeof(u32))) {  \
+   ret = -ENOSPC;  \
+   goto err_out;   \
+   }   \
+   (batch)[(i)++] = (val); \
+   } while(0)
+
 static int render_state_setup(struct render_state *so)
 {
const struct intel_renderstate_rodata *rodata = so-rodata;
@@ -110,6 +128,21 @@ static int render_state_setup(struct render_state *so)
 
d[i++] = s;
}
+
+   while (i % CACHELINE_DWORDS)
+   OUT_BATCH(d, i, MI_NOOP);
+
+   so-aux_batch_offset = i * sizeof(u32);
+
+   OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
+   so-aux_batch_size = (i * sizeof(u32)) - so-aux_batch_offset;
+
+   /*
+* Since we are sending length, we need to strictly conform to
+* all requirements. For Gen2 this must be a multiple of 8.
+*/
+   so-aux_batch_size = ALIGN(so-aux_batch_size, 8);
+
kunmap(page);
 
ret = i915_gem_object_set_to_gtt_domain(so-obj, false);
@@ -128,6 +161,8 @@ err_out:
return ret;
 }
 
+#undef OUT_BATCH
+
 void i915_gem_render_state_fini(struct render_state *so)
 {
i915_gem_object_ggtt_unpin(so-obj);
@@ -176,6 +211,16 @@ int i915_gem_render_state_init(struct drm_i915_gem_request 
*req)
if (ret)
goto out;
 
+   if (so.aux_batch_size  8) {
+   ret = req-ring-dispatch_execbuffer(req,
+(so.ggtt_offset +
+ so.aux_batch_offset),
+so.aux_batch_size,
+I915_DISPATCH_SECURE);
+   if (ret)
+   goto out;
+   }
+
i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
 
 out:
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h 
b/drivers/gpu/drm/i915/i915_gem_render_state.h
index 7aa7372..79de101 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.h
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
@@ -37,6 +37,8 @@ struct render_state {
struct drm_i915_gem_object *obj;
u64 ggtt_offset;
int gen;
+   u32 aux_batch_size;
+   u64 aux_batch_offset;
 };
 
 int 

[Intel-gfx] [PATCH v2 0/4] Add Pooled EU support to BXT

2015-07-17 Thread Arun Siluvery
v1: http://lists.freedesktop.org/archives/intel-gfx/2015-July/071951.html

v2: auxiliary batch size must be a multiple of 8, use feature flag while
enabling support and add comments to clarify various things.

Resending all patches as the order is changed.

Arun Siluvery (3):
  drm/i915: Add provision to extend Golden context batch
  drm/i915/bxt: Add get_param to query Pooled EU availability
  drm/i915:bxt: Enable Pooled EU support

Mika Kuoppala (1):
  drm/i915: Do kunmap if renderstate parsing fails

 drivers/gpu/drm/i915/i915_dma.c  |  3 ++
 drivers/gpu/drm/i915/i915_drv.c  |  1 +
 drivers/gpu/drm/i915/i915_drv.h  |  5 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c | 70 +++-
 drivers/gpu/drm/i915/i915_gem_render_state.h |  2 +
 drivers/gpu/drm/i915/i915_reg.h  |  2 +
 drivers/gpu/drm/i915/intel_lrc.c |  6 +++
 include/uapi/drm/i915_drm.h  |  1 +
 8 files changed, 87 insertions(+), 3 deletions(-)

-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 4/4] drm/i915:bxt: Enable Pooled EU support

2015-07-17 Thread Arun Siluvery
This mode allows to assign EUs to pools which can process work collectively.
The command to enable this mode should be issued as part of context 
initialization.

The pooled mode is global, once enabled it has to stay the same across all
contexts until HW reset hence this is sent in auxiliary golden context batch.
Thanks to Mika for the preliminary review and comments.

v2: explain why this is enabled in golden context, use feature flag while
enabling the support (Chris)

Cc: Mika Kuoppala mika.kuopp...@intel.com
Cc: Chris Wilson ch...@chris-wilson.co.uk
Cc: Armin Reese armin.c.re...@intel.com
Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com
---
 drivers/gpu/drm/i915/i915_gem_render_state.c | 15 +++
 drivers/gpu/drm/i915/i915_reg.h  |  2 ++
 2 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 5026a62..e4ff342 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -93,6 +93,7 @@ free_gem:
 
 static int render_state_setup(struct render_state *so)
 {
+   struct drm_device *dev = so-obj-base.dev;
const struct intel_renderstate_rodata *rodata = so-rodata;
unsigned int i = 0, reloc_index = 0;
struct page *page;
@@ -134,6 +135,20 @@ static int render_state_setup(struct render_state *so)
 
so-aux_batch_offset = i * sizeof(u32);
 
+   if (HAS_POOLED_EU(dev)) {
+   u32 pool_config = 0;
+   struct drm_i915_private *dev_priv = to_i915(dev);
+
+   OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE);
+   OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE);
+   if (dev_priv-info.subslice_total == 3)
+   pool_config = 0x00777000;
+   OUT_BATCH(d, i, pool_config);
+   OUT_BATCH(d, i, 0);
+   OUT_BATCH(d, i, 0);
+   OUT_BATCH(d, i, 0);
+   }
+
OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
so-aux_batch_size = (i * sizeof(u32)) - so-aux_batch_offset;
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 9a2ffad..e052499 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -379,6 +379,8 @@
  */
 #define GFX_INSTR(opcode, flags) ((0x3  29) | ((opcode)  24) | (flags))
 
+#define GEN9_MEDIA_POOL_STATE ((0x3  29) | (0x2  27) | (0x5  16) | 4)
+#define   GEN9_MEDIA_POOL_ENABLE  (1  31)
 #define GFX_OP_RASTER_RULES((0x329)|(0x724))
 #define GFX_OP_SCISSOR ((0x329)|(0x1c24)|(0x1019))
 #define   SC_UPDATE_SCISSOR   (0x11)
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 1/4] drm/i915: Do kunmap if renderstate parsing fails

2015-07-17 Thread Arun Siluvery
From: Mika Kuoppala mika.kuopp...@linux.intel.com

Kunmap the renderstate page on error path.

Reviewed-by: Arun Siluvery arun.siluv...@linux.intel.com
Signed-off-by: Mika Kuoppala mika.kuopp...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_render_state.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
b/drivers/gpu/drm/i915/i915_gem_render_state.c
index a0201fc..b6492fe 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -96,8 +96,10 @@ static int render_state_setup(struct render_state *so)
s = lower_32_bits(r);
if (so-gen = 8) {
if (i + 1 = rodata-batch_items ||
-   rodata-batch[i + 1] != 0)
-   return -EINVAL;
+   rodata-batch[i + 1] != 0) {
+   ret = -EINVAL;
+   goto err_out;
+   }
 
d[i++] = s;
s = upper_32_bits(r);
@@ -120,6 +122,10 @@ static int render_state_setup(struct render_state *so)
}
 
return 0;
+
+err_out:
+   kunmap(page);
+   return ret;
 }
 
 void i915_gem_render_state_fini(struct render_state *so)
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 4/4] drm/i915:bxt: Enable Pooled EU support

2015-07-17 Thread Chris Wilson
On Fri, Jul 17, 2015 at 07:13:34PM +0100, Arun Siluvery wrote:
 This mode allows to assign EUs to pools which can process work collectively.
 The command to enable this mode should be issued as part of context 
 initialization.
 
 The pooled mode is global, once enabled it has to stay the same across all
 contexts until HW reset hence this is sent in auxiliary golden context batch.
 Thanks to Mika for the preliminary review and comments.
 
 v2: explain why this is enabled in golden context, use feature flag while
 enabling the support (Chris)

You fell into the trap of telling userspace this was setup before we
actually do so.
 
 Cc: Mika Kuoppala mika.kuopp...@intel.com
 Cc: Chris Wilson ch...@chris-wilson.co.uk
 Cc: Armin Reese armin.c.re...@intel.com
 Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com
 ---
  drivers/gpu/drm/i915/i915_gem_render_state.c | 15 +++
  drivers/gpu/drm/i915/i915_reg.h  |  2 ++
  2 files changed, 17 insertions(+)
 
 diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
 b/drivers/gpu/drm/i915/i915_gem_render_state.c
 index 5026a62..e4ff342 100644
 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
 +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
 @@ -93,6 +93,7 @@ free_gem:
  
  static int render_state_setup(struct render_state *so)
  {
 + struct drm_device *dev = so-obj-base.dev;
   const struct intel_renderstate_rodata *rodata = so-rodata;
   unsigned int i = 0, reloc_index = 0;
   struct page *page;
 @@ -134,6 +135,20 @@ static int render_state_setup(struct render_state *so)
  
   so-aux_batch_offset = i * sizeof(u32);
  
 + if (HAS_POOLED_EU(dev)) {
 + u32 pool_config = 0;
 + struct drm_i915_private *dev_priv = to_i915(dev);

Just a minor, as this would be neater as

u32 pool_config =
INTEL_INFO(dev)-subslice_total == 3 ? 0x00777000 : 0;

At the very least keep both paths to set pool_config next to each other,
e.g.
u32 pool_config;
...
pool_config = 0;
if (INTEL_INFO(dev)-subslice_total == 3)
pool_config = 0x00777000;

Then we just have

 + OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE);
 + OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE);
 + OUT_BATCH(d, i, pool_config);
 + OUT_BATCH(d, i, 0);
 + OUT_BATCH(d, i, 0);
 + OUT_BATCH(d, i, 0);

Which is much easier to read.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 04/13 v4] drm/i915: GuC-specific firmware loader

2015-07-17 Thread O'Rourke, Tom
On Thu, Jul 09, 2015 at 07:29:05PM +0100, Dave Gordon wrote:
 From: Alex Dai yu@intel.com
 
 This fetches the required firmware image from the filesystem,
 then loads it into the GuC's memory via a dedicated DMA engine.
 
 This patch is derived from GuC loading work originally done by
 Vinit Azad and Ben Widawsky.
 
 v2:
 Various improvements per review comments by Chris Wilson
 
 v3:
 Removed 'wait' parameter to intel_guc_ucode_load() as firmware
 prefetch is no longer supported in the common firmware loader,
   per Daniel Vetter's request.
 Firmware checker callback fn now returns errno rather than bool.
 
 v4:
 Squash uC-independent code into GuC-specifc loader [Daniel Vetter]
 Don't keep the driver working (by falling back to execlist mode)
 if GuC firmware loading fails [Daniel Vetter]
 
 Issue: VIZ-4884
 Signed-off-by: Alex Dai yu@intel.com
 Signed-off-by: Dave Gordon david.s.gor...@intel.com
 ---
  drivers/gpu/drm/i915/Makefile   |   3 +
  drivers/gpu/drm/i915/i915_dma.c |   4 +
  drivers/gpu/drm/i915/i915_drv.h |  11 +
  drivers/gpu/drm/i915/i915_gem.c |  13 +
  drivers/gpu/drm/i915/i915_reg.h |   4 +-
  drivers/gpu/drm/i915/intel_guc.h|  67 
  drivers/gpu/drm/i915/intel_guc_loader.c | 536 
 
  7 files changed, 637 insertions(+), 1 deletion(-)
  create mode 100644 drivers/gpu/drm/i915/intel_guc.h
  create mode 100644 drivers/gpu/drm/i915/intel_guc_loader.c
 
 diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
 index de21965..e604cfe 100644
 --- a/drivers/gpu/drm/i915/Makefile
 +++ b/drivers/gpu/drm/i915/Makefile
 @@ -39,6 +39,9 @@ i915-y += i915_cmd_parser.o \
 intel_ringbuffer.o \
 intel_uncore.o
  
 +# general-purpose microcontroller (GuC) support
 +i915-y += intel_guc_loader.o
 +
  # autogenerated null render state
  i915-y += intel_renderstate_gen6.o \
 intel_renderstate_gen7.o \
 diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
 index 066c34c..958ab4f 100644
 --- a/drivers/gpu/drm/i915/i915_dma.c
 +++ b/drivers/gpu/drm/i915/i915_dma.c
 @@ -472,6 +472,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
  
  cleanup_gem:
   mutex_lock(dev-struct_mutex);
 + intel_guc_ucode_fini(dev);
   i915_gem_cleanup_ringbuffer(dev);
   i915_gem_context_fini(dev);
   mutex_unlock(dev-struct_mutex);
 @@ -869,6 +870,8 @@ int i915_driver_load(struct drm_device *dev, unsigned 
 long flags)
  
   intel_uncore_init(dev);
  
 + intel_guc_ucode_init(dev);
 +
   /* Load CSR Firmware for SKL */
   intel_csr_ucode_init(dev);
  
 @@ -1120,6 +1123,7 @@ int i915_driver_unload(struct drm_device *dev)
   flush_workqueue(dev_priv-wq);
  
   mutex_lock(dev-struct_mutex);
 + intel_guc_ucode_fini(dev);
   i915_gem_cleanup_ringbuffer(dev);
   i915_gem_context_fini(dev);
   mutex_unlock(dev-struct_mutex);
 diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
 index 4a512da..15b9202 100644
 --- a/drivers/gpu/drm/i915/i915_drv.h
 +++ b/drivers/gpu/drm/i915/i915_drv.h
 @@ -50,6 +50,7 @@
  #include linux/intel-iommu.h
  #include linux/kref.h
  #include linux/pm_qos.h
 +#include intel_guc.h
  
  /* General customization:
   */
 @@ -1694,6 +1695,8 @@ struct drm_i915_private {
  
   struct i915_virtual_gpu vgpu;
  
 + struct intel_guc guc;
 +
   struct intel_csr csr;
  
   /* Display CSR-related protection */
 @@ -1938,6 +1941,11 @@ static inline struct drm_i915_private 
 *dev_to_i915(struct device *dev)
   return to_i915(dev_get_drvdata(dev));
  }
  
 +static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
 +{
 + return container_of(guc, struct drm_i915_private, guc);
 +}
 +
  /* Iterate over initialised rings */
  #define for_each_ring(ring__, dev_priv__, i__) \
   for ((i__) = 0; (i__)  I915_NUM_RINGS; (i__)++) \
 @@ -2543,6 +2551,9 @@ struct drm_i915_cmd_table {
  
  #define HAS_CSR(dev) (IS_SKYLAKE(dev))
  
 +#define HAS_GUC_UCODE(dev)   (IS_GEN9(dev))
 +#define HAS_GUC_SCHED(dev)   (IS_GEN9(dev))
 +
  #define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \
   INTEL_INFO(dev)-gen = 8)
  
 diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
 index dbbb649..e020309 100644
 --- a/drivers/gpu/drm/i915/i915_gem.c
 +++ b/drivers/gpu/drm/i915/i915_gem.c
 @@ -5074,6 +5074,19 @@ i915_gem_init_hw(struct drm_device *dev)
   goto out;
   }
  
 + /* We can't enable contexts until all firmware is loaded */
 + ret = intel_guc_ucode_load(dev);
 +
 + /*
 +  * If we got an error and GuC submission is enabled, map
 +  * the error to -EIO so the GPU will be declared wedged.
 +  * OTOH, if we didn't intend to use the GuC anyway, just
 +  * discard the error and carry on.
 +  */
 + ret = 

Re: [Intel-gfx] [PATCH 01/13 v4] drm/i915: Add i915_gem_object_create_from_data()

2015-07-17 Thread O'Rourke, Tom
On Thu, Jul 09, 2015 at 07:29:02PM +0100, Dave Gordon wrote:
 i915_gem_object_create_from_data() is a generic function to save data
 from a plain linear buffer in a new pageable gem object that can later
 be accessed by the CPU and/or GPU.
 
 We will need this for the microcontroller firmware loading support code.
 
 Derived from i915_gem_object_write(), originally by Alex Dai
 
 v2:
 Change of function: now allocates  fills a new object, rather than
 writing to an existing object
 New name courtesy of Chris Wilson
 Explicit domain-setting and other improvements per review comments
 by Chris Wilson  Daniel Vetter
 
 v4:
 Rebased
 
 Issue: VIZ-4884
 Signed-off-by: Alex Dai yu@intel.com
 Signed-off-by: Dave Gordon david.s.gor...@intel.com
 ---
Reviewed-by: Tom O'Rourke Tom.O'rou...@intel.com
  drivers/gpu/drm/i915/i915_drv.h |  2 ++
  drivers/gpu/drm/i915/i915_gem.c | 40 
  2 files changed, 42 insertions(+)
 
 diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
 index 464b28d..3c91507 100644
 --- a/drivers/gpu/drm/i915/i915_drv.h
 +++ b/drivers/gpu/drm/i915/i915_drv.h
 @@ -2755,6 +2755,8 @@ void i915_gem_object_init(struct drm_i915_gem_object 
 *obj,
const struct drm_i915_gem_object_ops *ops);
  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 size_t size);
 +struct drm_i915_gem_object *i915_gem_object_create_from_data(
 + struct drm_device *dev, const void *data, size_t size);
  void i915_init_vm(struct drm_i915_private *dev_priv,
 struct i915_address_space *vm);
  void i915_gem_free_object(struct drm_gem_object *obj);
 diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
 index a0bff41..dbbb649 100644
 --- a/drivers/gpu/drm/i915/i915_gem.c
 +++ b/drivers/gpu/drm/i915/i915_gem.c
 @@ -5478,3 +5478,43 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object 
 *obj)
  
   return false;
  }
 +
 +/* Allocate a new GEM object and fill it with the supplied data */
 +struct drm_i915_gem_object *
 +i915_gem_object_create_from_data(struct drm_device *dev,
 +  const void *data, size_t size)
 +{
 + struct drm_i915_gem_object *obj;
 + struct sg_table *sg;
 + size_t bytes;
 + int ret;
 +
 + obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE));
 + if (IS_ERR_OR_NULL(obj))
 + return obj;
 +
 + ret = i915_gem_object_set_to_cpu_domain(obj, true);
 + if (ret)
 + goto fail;
 +
 + ret = i915_gem_object_get_pages(obj);
 + if (ret)
 + goto fail;
 +
 + i915_gem_object_pin_pages(obj);
 + sg = obj-pages;
 + bytes = sg_copy_from_buffer(sg-sgl, sg-nents, (void *)data, size);
 + i915_gem_object_unpin_pages(obj);
 +
 + if (WARN_ON(bytes != size)) {
 + DRM_ERROR(Incomplete copy, wrote %zu of %zu, bytes, size);
 + ret = -EFAULT;
 + goto fail;
 + }
 +
 + return obj;
 +
 +fail:
 + drm_gem_object_unreference(obj-base);
 + return ERR_PTR(ret);
 +}
 -- 
 1.9.1
 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 03/13 v4] drm/i915: Add GuC-related header files

2015-07-17 Thread O'Rourke, Tom
On Thu, Jul 09, 2015 at 07:29:04PM +0100, Dave Gordon wrote:
 intel_guc_fwif.h contains the subset of the GuC interface that we
 will need for submission of commands through the GuC. These MUST
 be kept in sync with the definitions used by the GuC firmware, and
 updates to this file will (or should) be autogenerated from the
 source files used to build the firmware. Editing this file is
 therefore not recommended.
 
 i915_guc_reg.h contains definitions of GuC-related hardware:
 registers, bitmasks, etc. These should match the BSpec.
 
 v2:
 Files renamed  resliced per review comments by Chris Wilson
 
 v4:
 Added DON'T-EDIT-ME warning [Tom O'Rourke]
 
 Issue: VIZ-4884
 Signed-off-by: Alex Dai yu@intel.com
 Signed-off-by: Dave Gordon david.s.gor...@intel.com
 ---
Reviewed-by: Tom O'Rourke Tom.O'rou...@intel.com

  drivers/gpu/drm/i915/i915_guc_reg.h   | 102 ++
  drivers/gpu/drm/i915/intel_guc_fwif.h | 245 
 ++
  2 files changed, 347 insertions(+)
  create mode 100644 drivers/gpu/drm/i915/i915_guc_reg.h
  create mode 100644 drivers/gpu/drm/i915/intel_guc_fwif.h
 
 diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h 
 b/drivers/gpu/drm/i915/i915_guc_reg.h
 new file mode 100644
 index 000..ccdc6c8
 --- /dev/null
 +++ b/drivers/gpu/drm/i915/i915_guc_reg.h
 @@ -0,0 +1,102 @@
 +/*
 + * Copyright © 2014 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the Software),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 DEALINGS
 + * IN THE SOFTWARE.
 + *
 + */
 +#ifndef _I915_GUC_REG_H_
 +#define _I915_GUC_REG_H_
 +
 +/* Definitions of GuC H/W registers, bits, etc */
 +
 +#define GUC_STATUS   0xc000
 +#define   GS_BOOTROM_SHIFT   1
 +#define   GS_BOOTROM_MASK  (0x7F  GS_BOOTROM_SHIFT)
 +#define   GS_BOOTROM_RSA_FAILED(0x50  GS_BOOTROM_SHIFT)
 +#define   GS_UKERNEL_SHIFT   8
 +#define   GS_UKERNEL_MASK  (0xFF  GS_UKERNEL_SHIFT)
 +#define   GS_UKERNEL_LAPIC_DONE(0x30  GS_UKERNEL_SHIFT)
 +#define   GS_UKERNEL_DPC_ERROR (0x60  GS_UKERNEL_SHIFT)
 +#define   GS_UKERNEL_READY (0xF0  GS_UKERNEL_SHIFT)
 +#define   GS_MIA_SHIFT   16
 +#define   GS_MIA_MASK  (0x07  GS_MIA_SHIFT)
 +
 +#define GUC_WOPCM_SIZE   0xc050
 +#define   GUC_WOPCM_SIZE_VALUE (0x80  12)  /* 512KB */
 +#define GUC_WOPCM_OFFSET 0x8 /* 512KB */
 +
 +#define SOFT_SCRATCH(n)  (0xc180 + ((n) * 4))
 +
 +#define UOS_RSA_SCRATCH_00xc200
 +#define DMA_ADDR_0_LOW   0xc300
 +#define DMA_ADDR_0_HIGH  0xc304
 +#define DMA_ADDR_1_LOW   0xc308
 +#define DMA_ADDR_1_HIGH  0xc30c
 +#define   DMA_ADDRESS_SPACE_WOPCM  (7  16)
 +#define   DMA_ADDRESS_SPACE_GTT(8  16)
 +#define DMA_COPY_SIZE0xc310
 +#define DMA_CTRL 0xc314
 +#define   UOS_MOVE (14)
 +#define   START_DMA(10)
 +#define DMA_GUC_WOPCM_OFFSET 0xc340
 +
 +#define GEN8_GT_PM_CONFIG0x138140
 +#define GEN9_GT_PM_CONFIG0x13816c
 +#define   GEN8_GT_DOORBELL_ENABLE  (10)
 +
 +#define GEN8_GTCR0x4274
 +#define   GEN8_GTCR_INVALIDATE (10)
 +
 +#define GUC_ARAT_C6DIS   0xA178
 +
 +#define GUC_SHIM_CONTROL 0xc064
 +#define   GUC_DISABLE_SRAM_INIT_TO_ZEROES(10)
 +#define   GUC_ENABLE_READ_CACHE_LOGIC(11)
 +#define   GUC_ENABLE_MIA_CACHING (12)
 +#define   GUC_GEN10_MSGCH_ENABLE (14)
 +#define   GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA(19)
 +#define   GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA   (110)
 +#define   GUC_ENABLE_MIA_CLOCK_GATING(115)
 +#define   GUC_GEN10_SHIM_WC_ENABLE   

Re: [Intel-gfx] [PATCH 02/13 v4] drm/i915: Add GuC-related module parameters

2015-07-17 Thread O'Rourke, Tom
On Thu, Jul 09, 2015 at 07:29:03PM +0100, Dave Gordon wrote:
 From: Alex Dai yu@intel.com
 
 Two new module parameters: enable_guc_submission which will turn
 on submission of batchbuffers via the GuC (when implemented), and
 guc_log_level which controls the level of debugging logged by the
 GuC and captured by the host.
 
 Signed-off-by: Alex Dai yu@intel.com
 
 v4:
 Mark enable_guc_submission unsafe [Daniel Vetter]
 
 Signed-off-by: Dave Gordon david.s.gor...@intel.com
 ---
Reviewed-by: Tom O'Rourke Tom.O'rou...@intel.com

  drivers/gpu/drm/i915/i915_drv.h| 2 ++
  drivers/gpu/drm/i915/i915_params.c | 9 +
  2 files changed, 11 insertions(+)
 
 diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
 index 3c91507..4a512da 100644
 --- a/drivers/gpu/drm/i915/i915_drv.h
 +++ b/drivers/gpu/drm/i915/i915_drv.h
 @@ -2606,6 +2606,8 @@ struct i915_params {
   bool reset;
   bool disable_display;
   bool disable_vtd_wa;
 + bool enable_guc_submission;
 + int guc_log_level;
   int use_mmio_flip;
   int mmio_debug;
   bool verbose_state_checks;
 diff --git a/drivers/gpu/drm/i915/i915_params.c 
 b/drivers/gpu/drm/i915/i915_params.c
 index 7983fe4..2791b5a 100644
 --- a/drivers/gpu/drm/i915/i915_params.c
 +++ b/drivers/gpu/drm/i915/i915_params.c
 @@ -53,6 +53,8 @@ struct i915_params i915 __read_mostly = {
   .verbose_state_checks = 1,
   .nuclear_pageflip = 0,
   .edp_vswing = 0,
 + .enable_guc_submission = false,
 + .guc_log_level = -1,
  };
  
  module_param_named(modeset, i915.modeset, int, 0400);
 @@ -186,3 +188,10 @@ MODULE_PARM_DESC(edp_vswing,
Ignore/Override vswing pre-emph table selection from VBT 
(0=use value from vbt [default], 1=low power swing(200mV),
2=default swing(400mV)));
 +
 +module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, 
 bool, 0400);
 +MODULE_PARM_DESC(enable_guc_submission, Enable GuC submission 
 (default:false));
 +
 +module_param_named(guc_log_level, i915.guc_log_level, int, 0400);
 +MODULE_PARM_DESC(guc_log_level,
 + GuC firmware logging level (-1:disabled (default), 0-3:enabled));
 -- 
 1.9.1
 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 00/13 v4] Batch submission via GuC

2015-07-17 Thread O'Rourke, Tom
On Thu, Jul 09, 2015 at 07:29:01PM +0100, Dave Gordon wrote:
 This patch series enables command submission via the GuC. In this mode,
 instead of the host CPU driving the execlist port directly, it hands
 over work items to the GuC, using a doorbell mechanism to tell the GuC
 that new items have been added to its work queue. The GuC then dispatches
 contexts to the various GPU engines, and manages the resulting context-
 switch interrupts. Completion of a batch is however still signalled to
 the CPU; the GuC is not involved in handling user interrupts.
 
 There are two subsequences within the patch series:
 
   drm/i915: Add i915_gem_object_create_from_data()
   drm/i915: Add GuC-related module parameters
   drm/i915: Add GuC-related header files
   drm/i915: GuC-specific firmware loader
   drm/i915: Debugfs interface to read GuC load status
 
 These five patches make up the GuC loader and its prerequisites.  At this
 point in the sequence we can load and activate the GuC firmware, but not
 submit any batches through it. (This is nonetheless a potentially useful
 state, as the GuC could do other useful work even when not handling batch
 submissions).
 
   drm/i915: Expose two LRC functions for GuC submission mode
   drm/i915: GuC submission setup, phase 1
   drm/i915: Enable GuC firmware log
   drm/i915: Implementation of GuC client
   drm/i915: Interrupt routing for GuC submission
   drm/i915: Integrate GuC-based command submission
   drm/i915: Debugfs interface for GuC submission statistics
   drm/i915: Enable GuC submission, where supported
 
 In this second section, we implement the GuC submission mechanism, link
 it into the (execlist-based) submission path, and finally enable it
 (on supported platforms). On platforms where there is no GuC, or if
 GuC submission is explicitly disabled, batch submission will revert to
 using the execlist mechanism directly.
 
 On the other hand, if the GuC firmware cannot be found or is invalid,
 the GPU will be unusable.
 
 The GuC firmware itself is not included in this patchset; it is or will
 be available for download from https://01.org/linuxgraphics/downloads/
 This driver works with and requires GuC firmware revision 3.x. It will
 not work with any firmware version 1.x, as the GuC protocol in those
 revisions was incompatible and is no longer supported.

[TOR:] I finished reviewing the first 5 patches for GuC
firmware loading.  These patches look ready to go.
Should we wait until the GuC version 3 firmware is
available from 01.org before merging?

I am still working on the second section for GuC submission.

Thanks,
Tom
 
 Ben Widawsky (0):
 Vinit Azad (0):
 Michael H. Nguyen (0):
   created the original versions on which some of these patches are based.
 
 Alex Dai (6):
   drm/i915: Add GuC-related module parameters
   drm/i915: GuC-specific firmware loader
   drm/i915: Debugfs interface to read GuC load status
   drm/i915: GuC submission setup, phase 1
   drm/i915: Enable GuC firmware log
   drm/i915: Integrate GuC-based command submission
 
 Dave Gordon (7):
   drm/i915: Add i915_gem_object_create_from_data()
   drm/i915: Add GuC-related header files
   drm/i915: Expose two LRC functions for GuC submission mode
   drm/i915: Implementation of GuC client
   drm/i915: Interrupt routing for GuC submission
   drm/i915: Debugfs interface for GuC submission statistics
   drm/i915: Enable GuC submission, where supported
 
  Documentation/DocBook/drm.tmpl |  14 +
  drivers/gpu/drm/i915/Makefile  |   4 +
  drivers/gpu/drm/i915/i915_debugfs.c| 110 +++-
  drivers/gpu/drm/i915/i915_dma.c|   4 +
  drivers/gpu/drm/i915/i915_drv.h|  15 +
  drivers/gpu/drm/i915/i915_gem.c|  53 ++
  drivers/gpu/drm/i915/i915_guc_reg.h| 102 
  drivers/gpu/drm/i915/i915_guc_submission.c | 853 
 +
  drivers/gpu/drm/i915/i915_params.c |   9 +
  drivers/gpu/drm/i915/i915_reg.h|  15 +-
  drivers/gpu/drm/i915/intel_guc.h   | 118 
  drivers/gpu/drm/i915/intel_guc_fwif.h  | 245 +
  drivers/gpu/drm/i915/intel_guc_loader.c| 618 +
  drivers/gpu/drm/i915/intel_lrc.c   |  72 ++-
  drivers/gpu/drm/i915/intel_lrc.h   |   9 +
  15 files changed, 2211 insertions(+), 30 deletions(-)
  create mode 100644 drivers/gpu/drm/i915/i915_guc_reg.h
  create mode 100644 drivers/gpu/drm/i915/i915_guc_submission.c
  create mode 100644 drivers/gpu/drm/i915/intel_guc.h
  create mode 100644 drivers/gpu/drm/i915/intel_guc_fwif.h
  create mode 100644 drivers/gpu/drm/i915/intel_guc_loader.c
 
 -- 
 1.9.1
 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] Updated drm-intel-testing

2015-07-17 Thread Daniel Vetter
Hi all,

New -testing cycle with cool stuff:
- prelim hw support dropped for skl after Damien fixed an ABI issue around
  planes
- legacy modesetting is done using atomic infrastructure now (Maarten)!
- more gen9 workarounds (ArunNick)
- MOCS programming (cache control for better performance) for skl/bxt
- vlv/chv dpll improvements (Ville)
- PSR fixes from Rodrigo
- fbc improvements from Paulo
- plumb requests into execlist submit functions (Mika)
- opregion code cleanup from Jani
- resource streamer support from Abdiel for mesa
- final fixes for 12bpc hdmi + enabling support from Ville

Happy testing!

Cheers, Daniel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 2/4] drm/i915: Add provision to extend Golden context batch

2015-07-17 Thread Chris Wilson
On Fri, Jul 17, 2015 at 07:13:32PM +0100, Arun Siluvery wrote:
 The Golden batch carries 3D state at the beginning so that HW starts with
 a known state. It is carried as a binary blob which is auto-generated from
 source. The idea was it would be easier to maintain and keep the complexity
 out of the kernel which makes sense as we don't really touch it. However if
 you really need to update it then you need to update generator source and
 keep the binary blob in sync with it.
 
 There is a need to patch this in bxt to send one additional command to enable
 a feature. A solution was to patch the binary data with some additional
 data structures (included as part of auto-generator source) but it was
 unnecessarily complicated.
 
 Chris suggested the idea of having a secondary batch and execute two batch
 buffers. It has clear advantages as we needn't touch the base golden batch,
 can customize secondary/auxiliary batch depending on Gen and can be carried
 in the driver with no dependencies.
 
 This patch adds support for this auxiliary batch which is inserted at the
 end of golden batch and is completely independent from it. Thanks to Mika
 for the preliminary review.
 
 v2: Strictly conform to the batch size requirements to cover Gen2 and
 add comments to clarify overflow check in macro (Chris, Mika).
 
 Cc: Mika Kuoppala mika.kuopp...@intel.com
 Cc: Chris Wilson ch...@chris-wilson.co.uk
 Cc: Armin Reese armin.c.re...@intel.com
 Signed-off-by: Arun Siluvery arun.siluv...@linux.intel.com
 ---
  drivers/gpu/drm/i915/i915_gem_render_state.c | 45 
 
  drivers/gpu/drm/i915/i915_gem_render_state.h |  2 ++
  drivers/gpu/drm/i915/intel_lrc.c |  6 
  3 files changed, 53 insertions(+)
 
 diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
 b/drivers/gpu/drm/i915/i915_gem_render_state.c
 index b6492fe..5026a62 100644
 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
 +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
 @@ -73,6 +73,24 @@ free_gem:
   return ret;
  }
  
 +/*
 + * Macro to add commands to auxiliary batch.
 + * This macro only checks for page overflow before inserting the commands,
 + * this is sufficient as the null state generator makes the final batch
 + * with two passes to build command and state separately. At this point
 + * the size of both are known and it compacts them by relocating the state
 + * right after the commands taking care of aligment so we should sufficient
 + * space below them for adding new commands.
 + */
 +#define OUT_BATCH(batch, i, val) \
 + do {\
 + if (WARN_ON((i) = PAGE_SIZE / sizeof(u32))) {  \
 + ret = -ENOSPC;  \
 + goto err_out;   \
 + }   \
 + (batch)[(i)++] = (val); \
 + } while(0)
 +
  static int render_state_setup(struct render_state *so)
  {
   const struct intel_renderstate_rodata *rodata = so-rodata;
 @@ -110,6 +128,21 @@ static int render_state_setup(struct render_state *so)
  
   d[i++] = s;
   }
 +
 + while (i % CACHELINE_DWORDS)
 + OUT_BATCH(d, i, MI_NOOP);
 +
 + so-aux_batch_offset = i * sizeof(u32);
 +
 + OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
 + so-aux_batch_size = (i * sizeof(u32)) - so-aux_batch_offset;
 +
 + /*
 +  * Since we are sending length, we need to strictly conform to
 +  * all requirements. For Gen2 this must be a multiple of 8.
 +  */
 + so-aux_batch_size = ALIGN(so-aux_batch_size, 8);
 +
   kunmap(page);
  
   ret = i915_gem_object_set_to_gtt_domain(so-obj, false);
 @@ -128,6 +161,8 @@ err_out:
   return ret;
  }
  
 +#undef OUT_BATCH
 +
  void i915_gem_render_state_fini(struct render_state *so)
  {
   i915_gem_object_ggtt_unpin(so-obj);
 @@ -176,6 +211,16 @@ int i915_gem_render_state_init(struct 
 drm_i915_gem_request *req)
   if (ret)
   goto out;
  
 + if (so.aux_batch_size  8) {
 + ret = req-ring-dispatch_execbuffer(req,
 +  (so.ggtt_offset +
 +   so.aux_batch_offset),
 +  so.aux_batch_size,
 +  I915_DISPATCH_SECURE);
 + if (ret)
 + goto out;
 + }
 +
   i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
  
  out:
 diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h 
 b/drivers/gpu/drm/i915/i915_gem_render_state.h
 index 7aa7372..79de101 100644
 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h
 +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
 @@ -37,6 +37,8 @@ struct render_state {
   struct drm_i915_gem_object *obj;
   u64 ggtt_offset;
   

Re: [Intel-gfx] [PATCH] drm/i915/bxt: WA for swapped HPD pins in A stepping

2015-07-17 Thread Imre Deak
On Fri, 2015-07-17 at 13:47 +0530, Sonika Jindal wrote:
 As per bspec, on BXT A0/A1, sw needs to activate DDIA HPD logic
 and interrupts to check the external panel connection and DDIC HPD
 logic for edp panel.
 
 v2: For DP, irq_port is used to determine the encoder instead of
 hpd_pin and removing the edp HPD logic because port A HPD is not
 present(Imre)
 
 Signed-off-by: Sonika Jindal sonika.jin...@intel.com
 ---
  drivers/gpu/drm/i915/intel_ddi.c  |   10 +-
  drivers/gpu/drm/i915/intel_hdmi.c |9 -
  2 files changed, 17 insertions(+), 2 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/intel_ddi.c 
 b/drivers/gpu/drm/i915/intel_ddi.c
 index e2c6f73..777e3a3 100644
 --- a/drivers/gpu/drm/i915/intel_ddi.c
 +++ b/drivers/gpu/drm/i915/intel_ddi.c
 @@ -3225,7 +3225,15 @@ void intel_ddi_init(struct drm_device *dev, enum port 
 port)
   goto err;
  
   intel_dig_port-hpd_pulse = intel_dp_hpd_pulse;
 - dev_priv-hotplug.irq_port[port] = intel_dig_port;
 + /*
 +  * On BXT A0/A1, sw needs to activate DDIA HPD logic and
 +  * interrupts to check the external panel connection.
 +  */
 + if (IS_BROXTON(dev_priv)  (INTEL_REVID(dev)  BXT_REVID_B0)
 +   port == PORT_B)
 + dev_priv-hotplug.irq_port[PORT_A] = intel_dig_port;

This happens to work but is confusing. irq_port[PORT_A] will be set here
already and the above will simply overwrite it without explanation. I
would also handle the port == PORT_A case and not set irq_port for it.

The same swapping for hpd_pin is missing from intel_dp_init_connector().

 + else
 + dev_priv-hotplug.irq_port[port] = intel_dig_port;
   }
  
   /* In theory we don't need the encoder-type check, but leave it just in
 diff --git a/drivers/gpu/drm/i915/intel_hdmi.c 
 b/drivers/gpu/drm/i915/intel_hdmi.c
 index 70bad5b..94fa716 100644
 --- a/drivers/gpu/drm/i915/intel_hdmi.c
 +++ b/drivers/gpu/drm/i915/intel_hdmi.c
 @@ -1973,7 +1973,14 @@ void intel_hdmi_init_connector(struct 
 intel_digital_port *intel_dig_port,
   intel_hdmi-ddc_bus = GMBUS_PIN_1_BXT;
   else
   intel_hdmi-ddc_bus = GMBUS_PIN_DPB;
 - intel_encoder-hpd_pin = HPD_PORT_B;
 + /*
 +  * On BXT A0/A1, sw needs to activate DDIA HPD logic and
 +  * interrupts to check the external panel connection.
 +  */
 + if (IS_BROXTON(dev_priv)  (INTEL_REVID(dev)  BXT_REVID_B0))
 + intel_encoder-hpd_pin = HPD_PORT_A;
 + else
 + intel_encoder-hpd_pin = HPD_PORT_B;
   break;
   case PORT_C:
   if (IS_BROXTON(dev_priv))

As I earlier pointed out with the above approach, you need to add
support for HPD events on the HPD_PORT_A pin. If you look at the
for_each_hpd_pin() macro and intel_hpd_irq_handler()/is_dig_port you'll
notice that any interrupt event on the HPD_PORT_A pin will be ignored
now.

--Imre

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 05/13 v4] drm/i915: Debugfs interface to read GuC load status

2015-07-17 Thread O'Rourke, Tom
On Thu, Jul 09, 2015 at 07:29:06PM +0100, Dave Gordon wrote:
 From: Alex Dai yu@intel.com
 
 The new node provides access to the status of the GuC-specific loader;
 also the scratch registers used for communication between the i915
 driver and the GuC firmware.
 
 v2:
 Changes to output formats per Chris Wilson's suggestions
 
 v4:
 Rebased
 
 Issue: VIZ-4884
 Signed-off-by: Alex Dai yu@intel.com
 Signed-off-by: Dave Gordon david.s.gor...@intel.com
 ---
Reviewed-by: Tom O'Rourke Tom.O'rou...@intel.com

  drivers/gpu/drm/i915/i915_debugfs.c | 39 
 +
  1 file changed, 39 insertions(+)
 
 diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
 b/drivers/gpu/drm/i915/i915_debugfs.c
 index 98fd3c9..9ff5f17 100644
 --- a/drivers/gpu/drm/i915/i915_debugfs.c
 +++ b/drivers/gpu/drm/i915/i915_debugfs.c
 @@ -2359,6 +2359,44 @@ static int i915_llc(struct seq_file *m, void *data)
   return 0;
  }
  
 +static int i915_guc_load_status_info(struct seq_file *m, void *data)
 +{
 + struct drm_info_node *node = m-private;
 + struct drm_i915_private *dev_priv = node-minor-dev-dev_private;
 + struct intel_guc_fw *guc_fw = dev_priv-guc.guc_fw;
 + u32 tmp, i;
 +
 + if (!HAS_GUC_UCODE(dev_priv-dev))
 + return 0;
 +
 + seq_printf(m, GuC firmware status:\n);
 + seq_printf(m, \tpath: %s\n,
 + guc_fw-guc_fw_path);
 + seq_printf(m, \tfetch: %s\n,
 + intel_guc_fw_status_repr(guc_fw-guc_fw_fetch_status));
 + seq_printf(m, \tload: %s\n,
 + intel_guc_fw_status_repr(guc_fw-guc_fw_load_status));
 + seq_printf(m, \tversion wanted: %d.%d\n,
 + guc_fw-guc_fw_major_wanted, guc_fw-guc_fw_minor_wanted);
 + seq_printf(m, \tversion found: %d.%d\n,
 + guc_fw-guc_fw_major_found, guc_fw-guc_fw_minor_found);
 +
 + tmp = I915_READ(GUC_STATUS);
 +
 + seq_printf(m, \nGuC status 0x%08x:\n, tmp);
 + seq_printf(m, \tBootrom status = 0x%x\n,
 + (tmp  GS_BOOTROM_MASK)  GS_BOOTROM_SHIFT);
 + seq_printf(m, \tuKernel status = 0x%x\n,
 + (tmp  GS_UKERNEL_MASK)  GS_UKERNEL_SHIFT);
 + seq_printf(m, \tMIA Core status = 0x%x\n,
 + (tmp  GS_MIA_MASK)  GS_MIA_SHIFT);
 + seq_puts(m, \nScratch registers:\n);
 + for (i = 0; i  16; i++)
 + seq_printf(m, \t%2d: \t0x%x\n, i, I915_READ(SOFT_SCRATCH(i)));
 +
 + return 0;
 +}
 +
  static int i915_edp_psr_status(struct seq_file *m, void *data)
  {
   struct drm_info_node *node = m-private;
 @@ -5073,6 +5111,7 @@ static const struct drm_info_list i915_debugfs_list[] = 
 {
   {i915_gem_hws_bsd, i915_hws_info, 0, (void *)VCS},
   {i915_gem_hws_vebox, i915_hws_info, 0, (void *)VECS},
   {i915_gem_batch_pool, i915_gem_batch_pool_info, 0},
 + {i915_guc_load_status, i915_guc_load_status_info, 0},
   {i915_frequency_info, i915_frequency_info, 0},
   {i915_hangcheck_info, i915_hangcheck_info, 0},
   {i915_drpc_info, i915_drpc_info, 0},
 -- 
 1.9.1
 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx