Re: [Intel-gfx] [PATCH 42/43] drm/i915/bdw: Pin the context backing objects to GGTT on-demand

2014-08-15 Thread Daniel Vetter
On Thu, Jul 24, 2014 at 05:04:50PM +0100, Thomas Daniel wrote:
 From: Oscar Mateo oscar.ma...@intel.com
 
 Up until now, we have pinned every logical ring context backing object
 during creation, and left it pinned until destruction. This made my life
 easier, but it's a harmful thing to do, because we cause fragmentation
 of the GGTT (and, eventually, we would run out of space).
 
 This patch makes the pinning on-demand: the backing objects of the two
 contexts that are written to the ELSP are pinned right before submission
 and unpinned once the hardware is done with them. The only context that
 is still pinned regardless is the global default one, so that the HWS can
 still be accessed in the same way (ring-status_page).
 
 v2: In the early version of this patch, we were pinning the context as
 we put it into the ELSP: on the one hand, this is very efficient because
 only a maximum two contexts are pinned at any given time, but on the other
 hand, we cannot really pin in interrupt time :(
 
 Signed-off-by: Oscar Mateo oscar.ma...@intel.com
 ---
  drivers/gpu/drm/i915/i915_debugfs.c |   11 +++--
  drivers/gpu/drm/i915/i915_drv.h |1 +
  drivers/gpu/drm/i915/i915_gem.c |   44 
 ---
  drivers/gpu/drm/i915/intel_lrc.c|   42 -
  drivers/gpu/drm/i915/intel_lrc.h|2 ++
  5 files changed, 73 insertions(+), 27 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
 b/drivers/gpu/drm/i915/i915_debugfs.c
 index 968c3c0..84531cc 100644
 --- a/drivers/gpu/drm/i915/i915_debugfs.c
 +++ b/drivers/gpu/drm/i915/i915_debugfs.c
 @@ -1721,10 +1721,15 @@ static int i915_dump_lrc(struct seq_file *m, void 
 *unused)
   continue;
  
   if (ctx_obj) {
 - struct page *page = 
 i915_gem_object_get_page(ctx_obj, 1);
 - uint32_t *reg_state = kmap_atomic(page);
 + struct page *page ;
 + uint32_t *reg_state;
   int j;
  
 + i915_gem_obj_ggtt_pin(ctx_obj, 
 GEN8_LR_CONTEXT_ALIGN, 0);

This just needs a get/put_pages, no pinning required.

 +
 + page = i915_gem_object_get_page(ctx_obj, 1);
 + reg_state = kmap_atomic(page);
 +
   seq_printf(m, CONTEXT: %s %u\n, ring-name,
   
 intel_execlists_ctx_id(ctx_obj));
  
 @@ -1736,6 +1741,8 @@ static int i915_dump_lrc(struct seq_file *m, void 
 *unused)
   }
   kunmap_atomic(reg_state);
  
 + i915_gem_object_ggtt_unpin(ctx_obj);
 +
   seq_putc(m, '\n');
   }
   }
 diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
 index 1ce51d6..70466af 100644
 --- a/drivers/gpu/drm/i915/i915_drv.h
 +++ b/drivers/gpu/drm/i915/i915_drv.h
 @@ -628,6 +628,7 @@ struct intel_context {
   struct {
   struct drm_i915_gem_object *state;
   struct intel_ringbuffer *ringbuf;
 + atomic_t unpin_count;

No need to reinvent wheels for this. We should be able to do exactly what
we've done for the legacy ctx objects, namely:
- Always pin the default system context so that we can always switch to
  that.
- Shovel all context releated objects through the active queue and obj
  management. This might depend upon the reworked exec list item.
- In the shrinker have a last-ditch effort to switch to the default
  context in case we run out of space.
- igt testcase. For legacy rings this code here resulted in some _really_
  expensive bugs and regressions.

I'll do another jira for this.

Otherwise I think I've pulled pretty much everything in except those
places where I think directly reworking the patches makes more sense. And
we should have JIRA tasks for all the outstanding work (plus ofc getting
ppgtt ready since execlists requires that).

If there's a patch I've missed or where people think it makes more sense
to merge the wip version now, please pipe up.

Thanks, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 42/43] drm/i915/bdw: Pin the context backing objects to GGTT on-demand

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Up until now, we have pinned every logical ring context backing object
during creation, and left it pinned until destruction. This made my life
easier, but it's a harmful thing to do, because we cause fragmentation
of the GGTT (and, eventually, we would run out of space).

This patch makes the pinning on-demand: the backing objects of the two
contexts that are written to the ELSP are pinned right before submission
and unpinned once the hardware is done with them. The only context that
is still pinned regardless is the global default one, so that the HWS can
still be accessed in the same way (ring-status_page).

v2: In the early version of this patch, we were pinning the context as
we put it into the ELSP: on the one hand, this is very efficient because
only a maximum two contexts are pinned at any given time, but on the other
hand, we cannot really pin in interrupt time :(

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c |   11 +++--
 drivers/gpu/drm/i915/i915_drv.h |1 +
 drivers/gpu/drm/i915/i915_gem.c |   44 ---
 drivers/gpu/drm/i915/intel_lrc.c|   42 -
 drivers/gpu/drm/i915/intel_lrc.h|2 ++
 5 files changed, 73 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 968c3c0..84531cc 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1721,10 +1721,15 @@ static int i915_dump_lrc(struct seq_file *m, void 
*unused)
continue;
 
if (ctx_obj) {
-   struct page *page = 
i915_gem_object_get_page(ctx_obj, 1);
-   uint32_t *reg_state = kmap_atomic(page);
+   struct page *page ;
+   uint32_t *reg_state;
int j;
 
+   i915_gem_obj_ggtt_pin(ctx_obj, 
GEN8_LR_CONTEXT_ALIGN, 0);
+
+   page = i915_gem_object_get_page(ctx_obj, 1);
+   reg_state = kmap_atomic(page);
+
seq_printf(m, CONTEXT: %s %u\n, ring-name,

intel_execlists_ctx_id(ctx_obj));
 
@@ -1736,6 +1741,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
}
kunmap_atomic(reg_state);
 
+   i915_gem_object_ggtt_unpin(ctx_obj);
+
seq_putc(m, '\n');
}
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1ce51d6..70466af 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -628,6 +628,7 @@ struct intel_context {
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
+   atomic_t unpin_count;
} engine[I915_NUM_RINGS];
 
struct list_head link;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 143cff7..42faaa3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2491,12 +2491,23 @@ static void i915_set_reset_status(struct 
drm_i915_private *dev_priv,
 
 static void i915_gem_free_request(struct drm_i915_gem_request *request)
 {
+   struct intel_context *ctx = request-ctx;
+
list_del(request-list);
i915_gem_request_remove_from_client(request);
 
-   if (request-ctx)
-   i915_gem_context_unreference(request-ctx);
+   if (ctx) {
+   struct intel_engine_cs *ring = request-ring;
+   struct drm_i915_gem_object *ctx_obj = 
ctx-engine[ring-id].state;
+   atomic_t *unpin_count = ctx-engine[ring-id].unpin_count;
 
+   if (ctx_obj) {
+   if (atomic_dec_return(unpin_count) == 0 
+   ctx != ring-default_context)
+   i915_gem_object_ggtt_unpin(ctx_obj);
+   }
+   i915_gem_context_unreference(ctx);
+   }
kfree(request);
 }
 
@@ -2551,6 +2562,23 @@ static void i915_gem_reset_ring_cleanup(struct 
drm_i915_private *dev_priv,
}
 
/*
+* Clear the execlists queue up before freeing the requests, as those
+* are the ones that keep the context and ringbuffer backing objects
+* pinned in place.
+*/
+   while (!list_empty(ring-execlist_queue)) {
+   struct intel_ctx_submit_request *submit_req;
+
+   submit_req = list_first_entry(ring-execlist_queue,
+   struct intel_ctx_submit_request,
+   execlist_link);
+