Re: [Intel-gfx] [RFC 19/39] drm/i915: Added scheduler support to __wait_request() calls

2015-07-21 Thread Daniel Vetter
On Fri, Jul 17, 2015 at 03:33:28PM +0100, john.c.harri...@intel.com wrote:
 From: John Harrison john.c.harri...@intel.com
 
 The scheduler can cause batch buffers, and hence requests, to be submitted to
 the ring out of order and asynchronously to their submission to the driver. 
 Thus
 at the point of waiting for the completion of a given request, it is not even
 guaranteed that the request has actually been sent to the hardware yet. Even 
 it
 is has been sent, it is possible that it could be pre-empted and thus 
 'unsent'.
 
 This means that it is necessary to be able to submit requests to the hardware
 during the wait call itself. Unfortunately, while some callers of
 __wait_request() release the mutex lock first, others do not (and apparently 
 can
 not). Hence there is the ability to deadlock as the wait stalls for submission
 but the asynchronous submission is stalled for the mutex lock.
 
 This change hooks the scheduler in to the __wait_request() code to ensure
 correct behaviour. That is, flush the target batch buffer through to the
 hardware and do not deadlock waiting for something that cannot currently be
 submitted. Instead, the wait call must return EAGAIN at least as far back as
 necessary to release the mutex lock and allow the scheduler's asynchronous
 processing to get in and handle the pre-emption operation and eventually
 (re-)submit the work.

I expect this to break the shrinker logic and that means fixing up all the
callers that bind objects into ggtt, like you already do (well comment
about) for the page fault handler. And just doing -EAGAIN spinning isn't
pretty either.

Instead we need a proper locking scheme here so that the scheduler can run
while the struct mutex is held. execlist submission had the same
requirement and hence has grown its own locks.

Note that there's currently one type of memory that we can evict which the
shrinker doesn't take care of, and that's to-be-unpinned buffers after
pageflips. We're working to fix that.
-Daniel

 
 Change-Id: I31fe6bc7e38f6ffdd843fcae16e7cc8b1e52a931
 For: VIZ-1587
 Signed-off-by: John Harrison john.c.harri...@intel.com
 ---
  drivers/gpu/drm/i915/i915_drv.h |  3 +-
  drivers/gpu/drm/i915/i915_gem.c | 37 +++---
  drivers/gpu/drm/i915/i915_scheduler.c   | 91 
 +
  drivers/gpu/drm/i915/i915_scheduler.h   |  2 +
  drivers/gpu/drm/i915/intel_display.c|  3 +-
  drivers/gpu/drm/i915/intel_ringbuffer.c |  2 +-
  6 files changed, 128 insertions(+), 10 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
 index 2b3fab6..e9e0736 100644
 --- a/drivers/gpu/drm/i915/i915_drv.h
 +++ b/drivers/gpu/drm/i915/i915_drv.h
 @@ -2972,7 +2972,8 @@ int __i915_wait_request(struct drm_i915_gem_request 
 *req,
   unsigned reset_counter,
   bool interruptible,
   s64 *timeout,
 - struct intel_rps_client *rps);
 + struct intel_rps_client *rps,
 + bool is_locked);
  int __must_check i915_wait_request(struct drm_i915_gem_request *req);
  int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
  int __must_check
 diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
 index cb5af5d..f713cda 100644
 --- a/drivers/gpu/drm/i915/i915_gem.c
 +++ b/drivers/gpu/drm/i915/i915_gem.c
 @@ -1219,7 +1219,8 @@ int __i915_wait_request(struct drm_i915_gem_request 
 *req,
   unsigned reset_counter,
   bool interruptible,
   s64 *timeout,
 - struct intel_rps_client *rps)
 + struct intel_rps_client *rps,
 + bool is_locked)
  {
   struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
   struct drm_device *dev = ring-dev;
 @@ -1229,8 +1230,10 @@ int __i915_wait_request(struct drm_i915_gem_request 
 *req,
   DEFINE_WAIT(wait);
   unsigned long timeout_expire;
   s64 before, now;
 - int ret;
 + int ret = 0;
 + boolbusy;
  
 + might_sleep();
   WARN(!intel_irqs_enabled(dev_priv), IRQs disabled);
  
   if (list_empty(req-list))
 @@ -1281,6 +1284,22 @@ int __i915_wait_request(struct drm_i915_gem_request 
 *req,
   break;
   }
  
 + if (is_locked) {
 + /* If this request is being processed by the scheduler
 +  * then it is unsafe to sleep with the mutex lock held
 +  * as the scheduler may require the lock in order to
 +  * progress the request. */
 + if (i915_scheduler_is_request_tracked(req, NULL, 
 busy)) {
 + if (busy) {
 + ret = -EAGAIN;
 + break;
 + }
 + }
 +
 +

[Intel-gfx] [RFC 19/39] drm/i915: Added scheduler support to __wait_request() calls

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The scheduler can cause batch buffers, and hence requests, to be submitted to
the ring out of order and asynchronously to their submission to the driver. Thus
at the point of waiting for the completion of a given request, it is not even
guaranteed that the request has actually been sent to the hardware yet. Even it
is has been sent, it is possible that it could be pre-empted and thus 'unsent'.

This means that it is necessary to be able to submit requests to the hardware
during the wait call itself. Unfortunately, while some callers of
__wait_request() release the mutex lock first, others do not (and apparently can
not). Hence there is the ability to deadlock as the wait stalls for submission
but the asynchronous submission is stalled for the mutex lock.

This change hooks the scheduler in to the __wait_request() code to ensure
correct behaviour. That is, flush the target batch buffer through to the
hardware and do not deadlock waiting for something that cannot currently be
submitted. Instead, the wait call must return EAGAIN at least as far back as
necessary to release the mutex lock and allow the scheduler's asynchronous
processing to get in and handle the pre-emption operation and eventually
(re-)submit the work.

Change-Id: I31fe6bc7e38f6ffdd843fcae16e7cc8b1e52a931
For: VIZ-1587
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h |  3 +-
 drivers/gpu/drm/i915/i915_gem.c | 37 +++---
 drivers/gpu/drm/i915/i915_scheduler.c   | 91 +
 drivers/gpu/drm/i915/i915_scheduler.h   |  2 +
 drivers/gpu/drm/i915/intel_display.c|  3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |  2 +-
 6 files changed, 128 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2b3fab6..e9e0736 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2972,7 +2972,8 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
unsigned reset_counter,
bool interruptible,
s64 *timeout,
-   struct intel_rps_client *rps);
+   struct intel_rps_client *rps,
+   bool is_locked);
 int __must_check i915_wait_request(struct drm_i915_gem_request *req);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int __must_check
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cb5af5d..f713cda 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1219,7 +1219,8 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
unsigned reset_counter,
bool interruptible,
s64 *timeout,
-   struct intel_rps_client *rps)
+   struct intel_rps_client *rps,
+   bool is_locked)
 {
struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
struct drm_device *dev = ring-dev;
@@ -1229,8 +1230,10 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
DEFINE_WAIT(wait);
unsigned long timeout_expire;
s64 before, now;
-   int ret;
+   int ret = 0;
+   boolbusy;
 
+   might_sleep();
WARN(!intel_irqs_enabled(dev_priv), IRQs disabled);
 
if (list_empty(req-list))
@@ -1281,6 +1284,22 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
break;
}
 
+   if (is_locked) {
+   /* If this request is being processed by the scheduler
+* then it is unsafe to sleep with the mutex lock held
+* as the scheduler may require the lock in order to
+* progress the request. */
+   if (i915_scheduler_is_request_tracked(req, NULL, 
busy)) {
+   if (busy) {
+   ret = -EAGAIN;
+   break;
+   }
+   }
+
+   /* If the request is not tracked by the scheduler then 
the
+* regular test can be done. */
+   }
+
if (i915_gem_request_completed(req)) {
ret = 0;
break;
@@ -1452,13 +1471,17 @@ i915_wait_request(struct drm_i915_gem_request *req)
 
BUG_ON(!mutex_is_locked(dev-struct_mutex));
 
+   ret = i915_scheduler_flush_request(req, true);
+   if (ret  0)
+   return ret;
+
ret = i915_gem_check_wedge(dev_priv-gpu_error, interruptible);
if (ret)
return ret;
 
ret = __i915_wait_request(req,