[Intel-gfx] [PATCH] drm/i915: Fix maxfifo watermark calc on vlv cursor planes

2015-10-23 Thread Thomas Daniel
A typo resulted in the watermarks for cursor planes not being calculated
correctly.  Fixed the typo.

Cc: Ville Syrjälä <ville.syrj...@linux.intel.com>
Signed-off-by: Thomas Daniel <thomas.dan...@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 0fb0459..c01dd2b 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -1135,7 +1135,7 @@ static void vlv_compute_wm(struct intel_crtc *crtc)
case DRM_PLANE_TYPE_CURSOR:
for (level = 0; level < wm_state->num_levels; level++)
wm_state->sr[level].cursor =
-   wm_state->sr[level].cursor;
+   wm_state->wm[level].cursor;
break;
case DRM_PLANE_TYPE_PRIMARY:
for (level = 0; level < wm_state->num_levels; level++)
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6] drm/i915: Add soft-pinning API for execbuffer

2015-10-16 Thread Thomas Daniel
From: Chris Wilson <ch...@chris-wilson.co.uk>

Userspace can pass in an offset that it presumes the object is located
at. The kernel will then do its utmost to fit the object into that
location. The assumption is that userspace is handling its own object
locations (for example along with full-ppgtt) and that the kernel will
rarely have to make space for the user's requests.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>

v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
(Not published externally)

v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
to allow eviction of soft-pinned objects when another soft-pinned object used
by a subsequent execbuffer overlaps reported by Michal Winiarski.
(Not published externally)

v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
pinned first after an address conflict happens to avoid repeated conflicts in
rare cases (Suggested by Chris Wilson).  Expanded comment on
drm_i915_gem_exec_object2.offset to cover this new API.

v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability
(Kristian). Added check for multiple pinnings on eviction (Akash). Made sure
buffers are not considered misplaced without the user specifying
EXEC_OBJECT_SUPPORTS_48B_ADDRESS.  User must assume responsibility for any
addressing workarounds.  Updated object2.offset field comment again to clarify
NO_RELOC case (Chris).  checkpatch cleanup.

v6: Trivial rebase on latest drm-intel-nightly

Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Akash Goel <akash.g...@intel.com>
Cc: Vinay Belgaumkar <vinay.belgaum...@intel.com>
Cc: Michal Winiarski <michal.winiar...@intel.com>
Cc: Zou Nanhai <nanhai@intel.com>
Cc: Kristian Høgsberg <hoegsb...@gmail.com>
Signed-off-by: Thomas Daniel <thomas.dan...@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c|  3 ++
 drivers/gpu/drm/i915/i915_drv.h|  2 +
 drivers/gpu/drm/i915/i915_gem.c| 64 --
 drivers/gpu/drm/i915/i915_gem_evict.c  | 39 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 19 +++--
 include/uapi/drm/i915_drm.h| 12 --
 6 files changed, 113 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 2336af9..824c6c3 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_RESOURCE_STREAMER:
value = HAS_RESOURCE_STREAMER(dev);
break;
+   case I915_PARAM_HAS_EXEC_SOFTPIN:
+   value = 1;
+   break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1396af9..73c3acf 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2816,6 +2816,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
 #define PIN_UPDATE (1<<5)
 #define PIN_ZONE_4G(1<<6)
 #define PIN_HIGH   (1<<7)
+#define PIN_OFFSET_FIXED   (1<<8)
 #define PIN_OFFSET_MASK (~4095)
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
@@ -3163,6 +3164,7 @@ int __must_check i915_gem_evict_something(struct 
drm_device *dev,
  unsigned long start,
  unsigned long end,
  unsigned flags);
+int __must_check i915_gem_evict_for_vma(struct i915_vma *target);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 
 /* belongs in i915_gem_gtt.h */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1e67484..c3453bd 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3450,30 +3450,50 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
if (IS_ERR(vma))
goto err_unpin;
 
-   if (flags & PIN_HIGH) {
-   search_flag = DRM_MM_SEARCH_BELOW;
-   alloc_flag = DRM_MM_CREATE_TOP;
+   if (flags & PIN_OFFSET_FIXED) {
+   uint64_t offset = flags & PIN_OFFSET_MASK;
+
+   if (offset & (alignment - 1)) {
+   ret = -EINVAL;
+   goto err_free_vma;
+   }
+   vma->node.start = offset;
+   vma->node.size = size;
+   vma->node.color = obj->cache_level;
+   ret = drm_mm_reserve_node(>mm, >node);
+   if (ret) {
+   ret = i915_gem_e

[Intel-gfx] [PATCH v5] drm/i915: Add soft-pinning API for execbuffer

2015-07-20 Thread Thomas Daniel
From: Chris Wilson ch...@chris-wilson.co.uk

Userspace can pass in an offset that it presumes the object is located
at. The kernel will then do its utmost to fit the object into that
location. The assumption is that userspace is handling its own object
locations (for example along with full-ppgtt) and that the kernel will
rarely have to make space for the user's requests.

Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk

v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
(Not published externally)

v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
to allow eviction of soft-pinned objects when another soft-pinned object used
by a subsequent execbuffer overlaps reported by Michal Winiarski.
(Not published externally)

v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
pinned first after an address conflict happens to avoid repeated conflicts in
rare cases (Suggested by Chris Wilson).  Expanded comment on
drm_i915_gem_exec_object2.offset to cover this new API.

v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability
(Kristian). Added check for multiple pinnings on eviction (Akash). Made sure
buffers are not considered misplaced without the user specifying
EXEC_OBJECT_SUPPORTS_48BBADDRESS.  User must assume responsibility for any
addressing workarounds.  Updated object2.offset field comment again to clarify
NO_RELOC case (Chris).  checkpatch cleanup.

Cc: Chris Wilson ch...@chris-wilson.co.uk
Cc: Akash Goel akash.g...@intel.com
Cc: Vinay Belgaumkar vinay.belgaum...@intel.com
Cc: Michal Winiarski michal.winiar...@intel.com
Cc: Zou Nanhai nanhai@intel.com
Cc: Kristian Høgsberg hoegsb...@gmail.com
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_dma.c|3 ++
 drivers/gpu/drm/i915/i915_drv.h|4 +++
 drivers/gpu/drm/i915/i915_gem.c|   52 
 drivers/gpu/drm/i915/i915_gem_evict.c  |   39 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   19 --
 include/uapi/drm/i915_drm.h|   12 +--
 6 files changed, 109 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index c5349fa..9805546 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
value = i915.enable_hangcheck 
intel_has_gpu_reset(dev);
break;
+   case I915_PARAM_HAS_EXEC_SOFTPIN:
+   value = 1;
+   break;
default:
DRM_DEBUG(Unknown parameter %d\n, param-param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d90a782..e96c101 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2747,7 +2747,9 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
 #define PIN_USER   (14)
 #define PIN_UPDATE (15)
 #define PIN_FULL_RANGE (16)
+#define PIN_OFFSET_FIXED   (17)
 #define PIN_OFFSET_MASK (~4095)
+
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
@@ -3085,6 +3087,8 @@ int __must_check i915_gem_evict_something(struct 
drm_device *dev,
  unsigned long start,
  unsigned long end,
  unsigned flags);
+int __must_check
+i915_gem_evict_for_vma(struct i915_vma *target);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 int i915_gem_evict_everything(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f170da6..fea4197 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3806,22 +3806,42 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
if (IS_ERR(vma))
goto err_unpin;
 
+   if (flags  PIN_OFFSET_FIXED) {
+   uint64_t offset = flags  PIN_OFFSET_MASK;
+
+   if (offset  (alignment - 1)) {
+   ret = -EINVAL;
+   goto err_free_vma;
+   }
+   vma-node.start = offset;
+   vma-node.size = size;
+   vma-node.color = obj-cache_level;
+   ret = drm_mm_reserve_node(vm-mm, vma-node);
+   if (ret) {
+   ret = i915_gem_evict_for_vma(vma);
+   if (ret == 0)
+   ret = drm_mm_reserve_node(vm-mm, vma-node);
+   }
+   if (ret)
+   goto err_free_vma;
+   } else {
 search_free:
-   ret

[Intel-gfx] [PATCH v4] drm/i915: Add soft-pinning API for execbuffer

2015-06-30 Thread Thomas Daniel
From: Chris Wilson ch...@chris-wilson.co.uk

Userspace can pass in an offset that it presumes the object is located
at. The kernel will then do its utmost to fit the object into that
location. The assumption is that userspace is handling its own object
locations (for example along with full-ppgtt) and that the kernel will
rarely have to make space for the user's requests.

Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk

v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
(Not published externally)

v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
to allow eviction of soft-pinned objects when another soft-pinned object used
by a subsequent execbuffer overlaps reported by Michal Winiarski.
(Not published externally)

v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
pinned first after an address conflict happens to avoid repeated conflicts in
rare cases (Suggested by Chris Wilson).  Expanded comment on
drm_i915_gem_exec_object2.offset to cover this new API.

Cc: Chris Wilson ch...@chris-wilson.co.uk
Cc: Akash Goel akash.g...@intel.com
Cc: Vinay Belgaumkar vinay.belgaum...@intel.com
Cc: Michal Winiarsky michal.winiar...@intel.com
Cc: Zou Nanhai nanhai@intel.com
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h|4 +++
 drivers/gpu/drm/i915/i915_gem.c|   51 
 drivers/gpu/drm/i915/i915_gem_evict.c  |   38 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   16 +++--
 include/uapi/drm/i915_drm.h|9 +++--
 5 files changed, 99 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d90a782..e96c101 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2747,7 +2747,9 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
 #define PIN_USER   (14)
 #define PIN_UPDATE (15)
 #define PIN_FULL_RANGE (16)
+#define PIN_OFFSET_FIXED   (17)
 #define PIN_OFFSET_MASK (~4095)
+
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
@@ -3085,6 +3087,8 @@ int __must_check i915_gem_evict_something(struct 
drm_device *dev,
  unsigned long start,
  unsigned long end,
  unsigned flags);
+int __must_check
+i915_gem_evict_for_vma(struct i915_vma *target);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 int i915_gem_evict_everything(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f170da6..a7e5ff2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3806,22 +3806,41 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
if (IS_ERR(vma))
goto err_unpin;
 
+   if (flags  PIN_OFFSET_FIXED) {
+   uint64_t offset = flags  PIN_OFFSET_MASK;
+   if (offset  (alignment - 1)) {
+   ret = -EINVAL;
+   goto err_free_vma;
+   }
+   vma-node.start = offset;
+   vma-node.size = size;
+   vma-node.color = obj-cache_level;
+   ret = drm_mm_reserve_node(vm-mm, vma-node);
+   if (ret) {
+   ret = i915_gem_evict_for_vma(vma);
+   if (ret == 0)
+   ret = drm_mm_reserve_node(vm-mm, vma-node);
+   }
+   if (ret)
+   goto err_free_vma;
+   } else {
 search_free:
-   ret = drm_mm_insert_node_in_range_generic(vm-mm, vma-node,
- size, alignment,
- obj-cache_level,
- start, end,
- search_flag,
- alloc_flag);
-   if (ret) {
-   ret = i915_gem_evict_something(dev, vm, size, alignment,
-  obj-cache_level,
-  start, end,
-  flags);
-   if (ret == 0)
-   goto search_free;
+   ret = drm_mm_insert_node_in_range_generic(vm-mm, vma-node,
+ size, alignment,
+ obj-cache_level,
+ start, end,
+ search_flag

[Intel-gfx] [PATCH] drm/i915: Shift driver's HWSP usage out of reserved range

2015-02-18 Thread Thomas Daniel
As of Gen6, the general purpose area of the hardware status page has shrunk and
now begins at dword 0x30.  i915 driver uses dword 0x20 to store the seqno which
is now reserved.  So shift our HWSP dwords up into the general purpose range
before this bites us.

Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/intel_ringbuffer.h |7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b6c484f..39183fc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -373,11 +373,12 @@ intel_write_status_page(struct intel_engine_cs *ring,
  * 0x06: ring 2 head pointer (915-class)
  * 0x10-0x1b: Context status DWords (GM45)
  * 0x1f: Last written status offset. (GM45)
+ * 0x20-0x2f: Reserved (Gen6+)
  *
- * The area from dword 0x20 to 0x3ff is available for driver usage.
+ * The area from dword 0x30 to 0x3ff is available for driver usage.
  */
-#define I915_GEM_HWS_INDEX 0x20
-#define I915_GEM_HWS_SCRATCH_INDEX 0x30
+#define I915_GEM_HWS_INDEX 0x30
+#define I915_GEM_HWS_SCRATCH_INDEX 0x40
 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX  
MI_STORE_DWORD_INDEX_SHIFT)
 
 void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Reset logical ring contexts' head and tail during GPU reset

2015-02-16 Thread Thomas Daniel
Work was getting left behind in LRC contexts during reset.  This causes a hang
if the GPU is reset when HEAD==TAIL because the context's ringbuffer head and
tail don't get reset and retiring a request doesn't alter them, so the ring
still appears full.

Added a function intel_lr_context_reset() to reset head and tail on a LRC and
its ringbuffer.

Call intel_lr_context_reset() for each context in i915_gem_context_reset() when
in execlists mode.

Testcase: igt/pm_rps --run-subtest reset #bdw
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88096
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_context.c |   12 +++
 drivers/gpu/drm/i915/intel_lrc.c|   34 +++
 drivers/gpu/drm/i915/intel_lrc.h|2 ++
 3 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 8603bf4..70346b0 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -296,11 +296,15 @@ void i915_gem_context_reset(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev-dev_private;
int i;
 
-   /* In execlists mode we will unreference the context when the execlist
-* queue is cleared and the requests destroyed.
-*/
-   if (i915.enable_execlists)
+   if (i915.enable_execlists) {
+   struct intel_context *ctx;
+
+   list_for_each_entry(ctx, dev_priv-context_list, link) {
+   intel_lr_context_reset(dev, ctx);
+   }
+
return;
+   }
 
for (i = 0; i  I915_NUM_RINGS; i++) {
struct intel_engine_cs *ring = dev_priv-ring[i];
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index aafcef3..1946bb9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1950,3 +1950,37 @@ error_unpin_ctx:
drm_gem_object_unreference(ctx_obj-base);
return ret;
 }
+
+void intel_lr_context_reset(struct drm_device *dev,
+   struct intel_context *ctx)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct intel_engine_cs *ring;
+   int i;
+
+   for_each_ring(ring, dev_priv, i) {
+   struct drm_i915_gem_object *ctx_obj =
+   ctx-engine[ring-id].state;
+   if (ctx_obj) {
+   struct intel_ringbuffer *ringbuf =
+   ctx-engine[ring-id].ringbuf;
+   uint32_t *reg_state;
+   struct page *page;
+
+   if (i915_gem_object_get_pages(ctx_obj)) {
+   WARN(1, Failed get_pages for context obj\n);
+   continue;
+   }
+   page = i915_gem_object_get_page(ctx_obj, 1);
+   reg_state = kmap_atomic(page);
+
+   reg_state[CTX_RING_HEAD+1] = 0;
+   reg_state[CTX_RING_TAIL+1] = 0;
+
+   kunmap_atomic(reg_state);
+
+   ringbuf-head = 0;
+   ringbuf-tail = 0;
+   }
+   }
+}
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index f635735..5dd0eca 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -73,6 +73,8 @@ int intel_lr_context_deferred_create(struct intel_context 
*ctx,
 struct intel_engine_cs *ring);
 void intel_lr_context_unpin(struct intel_engine_cs *ring,
struct intel_context *ctx);
+void intel_lr_context_reset(struct drm_device *dev,
+   struct intel_context *ctx);
 
 /* Execlists */
 int intel_sanitize_enable_execlists(struct drm_device *dev, int 
enable_execlists);
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH i-g-t] tests/prime_self_import: further fix object counts

2015-01-19 Thread Thomas Daniel
A previous commit:

commit 2f2c491cf3167befe7c79e4b17afb4f6284dfc84
Author: Mika Kuoppala mika.kuopp...@intel.com
Date:   Fri Mar 28 10:52:46 2014 +0200

lib/drmtest: don't dup quiescent fd

introduced a regression for drm object leak checking.  A following commit:

commit 8741c2289f17e9bcb740a01cad4764a71c918eea
Author: Mika Kuoppala mika.kuopp...@intel.com
Date:   Wed May 7 16:46:19 2014 +0300

tests/gem_flink_race,prime_self_import: fix object counts

fixed the regression for some cases but missed the export-vs-gem_close-race
subtest.  Note that test behaviour depends on whether the subtest is run as
part of the whole prime_self_import test (fail), or as a single subtest (pass).
Using execlists on Gen8+ reverses the pass/fail due to lazy context allocation
which is presumably why this has now been noticed.

This commit applies the same fix to the export-vs-gem_close-race subtest.

Cc: Mika Kuoppala mika.kuopp...@intel.com
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=87627
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 tests/prime_self_import.c |   10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tests/prime_self_import.c b/tests/prime_self_import.c
index 1eb5a04..ded92cf 100644
--- a/tests/prime_self_import.c
+++ b/tests/prime_self_import.c
@@ -342,15 +342,20 @@ static void test_export_close_race(void)
int fd;
int obj_count;
void *status;
+   int fake;
 
num_threads = sysconf(_SC_NPROCESSORS_ONLN);
 
threads = calloc(num_threads, sizeof(pthread_t));
 
-   fd = drm_open_any();
+   /* Allocate exit handler fds in here so that we dont screw
+* up the counts */
+   fake = drm_open_any();
 
obj_count = get_object_count();
 
+   fd = drm_open_any();
+
for (i = 0; i  num_threads; i++) {
r = pthread_create(threads[i], NULL,
   thread_fn_export_vs_close,
@@ -372,6 +377,9 @@ static void test_export_close_race(void)
obj_count = get_object_count() - obj_count;
 
igt_info(leaked %i objects\n, obj_count);
+
+   close(fake);
+
igt_assert_eq(obj_count, 0);
 }
 
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2] drm/i915: Reset CSB read pointer in ring init

2015-01-09 Thread Thomas Daniel
A previous commit enabled execlists by default:

   commit 27401d126b5b (drm/i915/bdw: Enable execlists by default where 
supported)

This allowed routine testing of execlists which exposed a regression when
resuming from suspend.  The cause was tracked down the to recent changes to the
ring init sequence:

   commit 35a57ffbb108 (drm/i915: Only init engines once)

During a suspend/resume cycle the hardware Context Status Buffer write pointer
is reset.  However since the recent changes to the init sequence the software 
CSB
read pointer is no longer reset.  This means that context status events are not
handled correctly and new contexts are not written to the ELSP, resulting in an
apparent GPU hang.

Pending further changes to the ring init code, just move the
ring-next_context_status_buffer initialization into gen8_init_common_ring to
fix this regression.

v2: Moved init into gen8_init_common_ring rather than context_enable after
feedback from Daniel Vetter.  Updated commit msg to reflect this and also cite
commits related to the regression.  Fixed bz link to correct bug.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88096
Cc: Paulo Zanoni paulo.r.zan...@intel.com
Cc: Daniel Vetter daniel.vet...@ffwll.ch
Cc: Dave Gordon david.s.gor...@intel.com
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7670a0f..e405b61 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1137,6 +1137,7 @@ static int gen8_init_common_ring(struct intel_engine_cs 
*ring)
   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
POSTING_READ(RING_MODE_GEN7(ring));
+   ring-next_context_status_buffer = 0;
DRM_DEBUG_DRIVER(Execlists enabled for %s\n, ring-name);
 
memset(ring-hangcheck, 0, sizeof(ring-hangcheck));
@@ -1394,7 +1395,6 @@ static int logical_ring_init(struct drm_device *dev, 
struct intel_engine_cs *rin
INIT_LIST_HEAD(ring-execlist_queue);
INIT_LIST_HEAD(ring-execlist_retired_req_list);
spin_lock_init(ring-execlist_lock);
-   ring-next_context_status_buffer = 0;
 
ret = i915_cmd_parser_init_ring(ring);
if (ret)
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Reset CSB read pointer when enabling contexts

2015-01-07 Thread Thomas Daniel
During a suspend/resume cycle the hardware Context Status Buffer write pointer
is reset.  However since recent changes to the init sequence the software CSB
read pointer is no longer reset.  This means that context status events are not
handled correctly and new contexts are not written to the ELSP, resulting in an
apparent GPU hang.

Pending further changes to the ring init code, just move the
ring-next_context_status_buffer initialization into i915_gem_context_enable to
fix this regression.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88097
Cc: Paulo Zanoni paulo.r.zan...@intel.com
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_context.c |1 +
 drivers/gpu/drm/i915/intel_lrc.c|1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index bf9778e..cc100c9 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -412,6 +412,7 @@ int i915_gem_context_enable(struct drm_i915_private 
*dev_priv)
 
if (i915.enable_execlists) {
for_each_ring(ring, dev_priv, i) {
+   ring-next_context_status_buffer = 0;
if (ring-init_context) {
ret = ring-init_context(ring,
ring-default_context);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7670a0f..4580267 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1394,7 +1394,6 @@ static int logical_ring_init(struct drm_device *dev, 
struct intel_engine_cs *rin
INIT_LIST_HEAD(ring-execlist_queue);
INIT_LIST_HEAD(ring-execlist_retired_req_list);
spin_lock_init(ring-execlist_lock);
-   ring-next_context_status_buffer = 0;
 
ret = i915_cmd_parser_init_ring(ring);
if (ret)
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/bdw: Enable execlists by default where supported

2014-12-11 Thread Thomas Daniel
Execlist support in the i915 driver is now considered good enough for the
feature to be enabled by default on Gen8 and later and routinely tested.
Adjusted i915 parameters structure initialization to reflect this and updated
the comment in intel_sanitize_enable_execlists().

Issue: VIZ-2020
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_params.c |2 +-
 drivers/gpu/drm/i915/intel_lrc.c   |3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_params.c 
b/drivers/gpu/drm/i915/i915_params.c
index c91cb20..ad685d8 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -35,7 +35,7 @@ struct i915_params i915 __read_mostly = {
.vbt_sdvo_panel_type = -1,
.enable_rc6 = -1,
.enable_fbc = -1,
-   .enable_execlists = 0,
+   .enable_execlists = -1,
.enable_hangcheck = true,
.enable_ppgtt = -1,
.enable_psr = 0,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 89b5577..4dc6d42 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -212,8 +212,7 @@ static int intel_lr_context_pin(struct intel_engine_cs 
*ring,
  * @enable_execlists: value of i915.enable_execlists module parameter.
  *
  * Only certain platforms support Execlists (the prerequisites being
- * support for Logical Ring Contexts and Aliasing PPGTT or better),
- * and only when enabled via module parameter.
+ * support for Logical Ring Contexts and Aliasing PPGTT or better).
  *
  * Return: 1 if Execlists is supported and has to be enabled.
  */
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2] drm/i915/bdw: Enable execlists by default where supported

2014-12-11 Thread Thomas Daniel
Execlist support in the i915 driver is now considered good enough for the
feature to be enabled by default on Gen8 and later and routinely tested.
Adjusted i915 parameters structure initialization to reflect this and updated
the comment in intel_sanitize_enable_execlists().

v2: Update the MODULE_PARM_DESC too.

Issue: VIZ-2020
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_params.c |4 ++--
 drivers/gpu/drm/i915/intel_lrc.c   |3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_params.c 
b/drivers/gpu/drm/i915/i915_params.c
index c91cb20..f6af6d4 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -35,7 +35,7 @@ struct i915_params i915 __read_mostly = {
.vbt_sdvo_panel_type = -1,
.enable_rc6 = -1,
.enable_fbc = -1,
-   .enable_execlists = 0,
+   .enable_execlists = -1,
.enable_hangcheck = true,
.enable_ppgtt = -1,
.enable_psr = 0,
@@ -122,7 +122,7 @@ MODULE_PARM_DESC(enable_ppgtt,
 module_param_named(enable_execlists, i915.enable_execlists, int, 0400);
 MODULE_PARM_DESC(enable_execlists,
Override execlists usage. 
-   (-1=auto, 0=disabled [default], 1=enabled));
+   (-1=auto [default], 0=disabled, 1=enabled));
 
 module_param_named(enable_psr, i915.enable_psr, int, 0600);
 MODULE_PARM_DESC(enable_psr, Enable PSR (default: false));
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 89b5577..4dc6d42 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -212,8 +212,7 @@ static int intel_lr_context_pin(struct intel_engine_cs 
*ring,
  * @enable_execlists: value of i915.enable_execlists module parameter.
  *
  * Only certain platforms support Execlists (the prerequisites being
- * support for Logical Ring Contexts and Aliasing PPGTT or better),
- * and only when enabled via module parameter.
+ * support for Logical Ring Contexts and Aliasing PPGTT or better).
  *
  * Return: 1 if Execlists is supported and has to be enabled.
  */
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Fix startup failure in LRC mode after recent init changes

2014-12-02 Thread Thomas Daniel
A previous commit introduced engine init changes:

commit 372ee59699d9 (drm/i915: Only init engines once)

This broke execlists as intel_lr_context_render_state_init was trying to emit
commands to the RCS for the default context before the ring-init_hw was called.

Made a new gen8_init_rcs_context function and assign in to render ring
init_context.  Moved call to intel_logical_ring_workarounds_emit into
gen8_init_rcs_context to maintain previous functionality.

Moved call to render_state_init from lr_context_deferred_create into
gen8_init_rcs_context, and modified deferred_create to call ring-init_context
for non-default contexts.

Modified i915_gem_context_enable to call ring-init_context for the default
context.

So init_context will now always be called when the hw is ready - in
i915_gem_context_enable for the default context and in 
lr_context_deferred_create
for other contexts.

Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_context.c |   25 ++---
 drivers/gpu/drm/i915/intel_lrc.c|   30 +++---
 2 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 3c3a9ff..5cd2b97 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -408,14 +408,25 @@ int i915_gem_context_enable(struct drm_i915_private 
*dev_priv)
 
BUG_ON(!dev_priv-ring[RCS].default_context);
 
-   if (i915.enable_execlists)
-   return 0;
+   if (i915.enable_execlists) {
+   for_each_ring(ring, dev_priv, i) {
+   if (ring-init_context) {
+   ret = ring-init_context(ring,
+   ring-default_context);
+   if (ret) {
+   DRM_ERROR(ring init context: %d\n,
+   ret);
+   return ret;
+   }
+   }
+   }
 
-   for_each_ring(ring, dev_priv, i) {
-   ret = i915_switch_context(ring, ring-default_context);
-   if (ret)
-   return ret;
-   }
+   } else
+   for_each_ring(ring, dev_priv, i) {
+   ret = i915_switch_context(ring, ring-default_context);
+   if (ret)
+   return ret;
+   }
 
return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4ffb08c..79ef40c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1336,6 +1336,18 @@ static int gen8_emit_request(struct intel_ringbuffer 
*ringbuf)
return 0;
 }
 
+static int gen8_init_rcs_context(struct intel_engine_cs *ring,
+  struct intel_context *ctx)
+{
+   int ret;
+
+   ret = intel_logical_ring_workarounds_emit(ring, ctx);
+   if (ret)
+   return ret;
+
+   return intel_lr_context_render_state_init(ring, ctx);
+}
+
 /**
  * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
  *
@@ -1409,7 +1421,7 @@ static int logical_render_ring_init(struct drm_device 
*dev)
ring-irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
 
ring-init_hw = gen8_init_render_ring;
-   ring-init_context = intel_logical_ring_workarounds_emit;
+   ring-init_context = gen8_init_rcs_context;
ring-cleanup = intel_fini_pipe_control;
ring-get_seqno = gen8_get_seqno;
ring-set_seqno = gen8_set_seqno;
@@ -1905,21 +1917,17 @@ int intel_lr_context_deferred_create(struct 
intel_context *ctx,
 
if (ctx == ring-default_context)
lrc_setup_hardware_status_page(ring, ctx_obj);
-
-   if (ring-id == RCS  !ctx-rcs_initialized) {
+   else if (ring-id == RCS  !ctx-rcs_initialized) {
if (ring-init_context) {
ret = ring-init_context(ring, ctx);
-   if (ret)
+   if (ret) {
DRM_ERROR(ring init context: %d\n, ret);
+   ctx-engine[ring-id].ringbuf = NULL;
+   ctx-engine[ring-id].state = NULL;
+   goto error;
+   }
}
 
-   ret = intel_lr_context_render_state_init(ring, ctx);
-   if (ret) {
-   DRM_ERROR(Init render state failed: %d\n, ret);
-   ctx-engine[ring-id].ringbuf = NULL;
-   ctx-engine[ring-id].state = NULL;
-   goto error;
-   }
ctx-rcs_initialized = true;
}
 
-- 
1.7.9.5

___
Intel

[Intel-gfx] [PATCH] drm/i915: Don't pin LRC in GGTT when dumping in debugfs

2014-12-02 Thread Thomas Daniel
LRC object does not need to be mapped into the GGTT when dumping. A side-effect
of this patch is that a compiler warning goes away (not checking return value
of i915_gem_obj_ggtt_pin).

v2: Broke out individual context dumping into a new function as the indentation
was getting a bit crazy.  Added notification of contexts with no gem object for
debugging purposes.  Removed unnecessary pin_pages and unpin_pages, replaced
with explicit get_pages for the context object as there may be no backing store
allocated at this time (Comment for get_pages says Ensure that the associated
pages are gathered from the backing storage and pinned into our object).
Improved error checking - get_pages and get_page are checked for failure.

Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c |   84 ---
 1 file changed, 49 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 7ea3843..e1de646 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1773,6 +1773,50 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
return 0;
 }
 
+static void i915_dump_lrc_obj(struct seq_file *m,
+   struct intel_engine_cs *ring,
+   struct drm_i915_gem_object *ctx_obj)
+{
+   struct page *page;
+   uint32_t *reg_state;
+   int j;
+   unsigned long ggtt_offset = 0;
+
+   if (ctx_obj == NULL) {
+   seq_printf(m, Context on %s with no gem object\n,
+   ring-name);
+   return;
+   }
+
+   seq_printf(m, CONTEXT: %s %u\n, ring-name,
+   intel_execlists_ctx_id(ctx_obj));
+
+   if (!i915_gem_obj_ggtt_bound(ctx_obj))
+   seq_puts(m, \tNot bound in GGTT\n);
+   else
+   ggtt_offset = i915_gem_obj_ggtt_offset(ctx_obj);
+
+   if (i915_gem_object_get_pages(ctx_obj)) {
+   seq_puts(m, \tFailed to get pages for context object\n);
+   return;
+   }
+
+   page = i915_gem_object_get_page(ctx_obj, 1);
+   if (!WARN_ON(page == NULL)) {
+   reg_state = kmap_atomic(page);
+
+   for (j = 0; j  0x600 / sizeof(u32) / 4; j += 4) {
+   seq_printf(m, \t[0x%08lx] 0x%08x 0x%08x 0x%08x 
0x%08x\n,
+   ggtt_offset + 4096 + (j * 4),
+   reg_state[j], reg_state[j + 1],
+   reg_state[j + 2], reg_state[j + 3]);
+   }
+   kunmap_atomic(reg_state);
+   }
+
+   seq_putc(m, '\n');
+}
+
 static int i915_dump_lrc(struct seq_file *m, void *unused)
 {
struct drm_info_node *node = (struct drm_info_node *) m-private;
@@ -1791,41 +1835,11 @@ static int i915_dump_lrc(struct seq_file *m, void 
*unused)
if (ret)
return ret;
 
-   list_for_each_entry(ctx, dev_priv-context_list, link) {
-   for_each_ring(ring, dev_priv, i) {
-   struct drm_i915_gem_object *ctx_obj = 
ctx-engine[i].state;
-
-   if (ring-default_context == ctx)
-   continue;
-
-   if (ctx_obj) {
-   struct page *page;
-   uint32_t *reg_state;
-   int j;
-
-   i915_gem_obj_ggtt_pin(ctx_obj,
-   GEN8_LR_CONTEXT_ALIGN, 0);
-
-   page = i915_gem_object_get_page(ctx_obj, 1);
-   reg_state = kmap_atomic(page);
-
-   seq_printf(m, CONTEXT: %s %u\n, ring-name,
-   
intel_execlists_ctx_id(ctx_obj));
-
-   for (j = 0; j  0x600 / sizeof(u32) / 4; j += 
4) {
-   seq_printf(m, \t[0x%08lx] 0x%08x 
0x%08x 0x%08x 0x%08x\n,
-   i915_gem_obj_ggtt_offset(ctx_obj) + 
4096 + (j * 4),
-   reg_state[j], reg_state[j + 1],
-   reg_state[j + 2], reg_state[j + 3]);
-   }
-   kunmap_atomic(reg_state);
-
-   i915_gem_object_ggtt_unpin(ctx_obj);
-
-   seq_putc(m, '\n');
-   }
-   }
-   }
+   list_for_each_entry(ctx, dev_priv-context_list, link)
+   for_each_ring(ring, dev_priv, i)
+   if (ring-default_context != ctx)
+   i915_dump_lrc_obj(m, ring,
+   ctx-engine[i].state);
 
mutex_unlock(dev-struct_mutex);
 
-- 
1.7.9.5

[Intel-gfx] [PATCH] drm/i915: Fix context object leak for legacy contexts

2014-11-25 Thread Thomas Daniel
Dynamic context pinning for LRCs introduced a leak in legacy mode.
Reinstate context unreference in i915_gem_free_request for legacy contexts.

Leak reported by i-g-t/drv_module_reload fixed by this patch.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86507
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c |   10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 614bc2b..c630d49 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2574,11 +2574,13 @@ static void i915_gem_free_request(struct 
drm_i915_gem_request *request)
list_del(request-list);
i915_gem_request_remove_from_client(request);
 
-   if (i915.enable_execlists  ctx) {
-   struct intel_engine_cs *ring = request-ring;
+   if (ctx) {
+   if (i915.enable_execlists) {
+   struct intel_engine_cs *ring = request-ring;
 
-   if (ctx != ring-default_context)
-   intel_lr_context_unpin(ring, ctx);
+   if (ctx != ring-default_context)
+   intel_lr_context_unpin(ring, ctx);
+   }
i915_gem_context_unreference(ctx);
}
kfree(request);
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Don't pin LRC in GGTT when dumping in debugfs

2014-11-20 Thread Thomas Daniel
LRC object does not need to be mapped into the GGTT when dumping. Just use
pin_pages. A side-effect of this patch is that a compiler warning goes away
(not checking return value of i915_gem_obj_ggtt_pin).

Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c |   19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index f91e7f7..7e1e7f7 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1801,25 +1801,30 @@ static int i915_dump_lrc(struct seq_file *m, void 
*unused)
struct page *page;
uint32_t *reg_state;
int j;
+   unsigned long ggtt_offset = 0;
 
-   i915_gem_obj_ggtt_pin(ctx_obj,
-   GEN8_LR_CONTEXT_ALIGN, 0);
-
-   page = i915_gem_object_get_page(ctx_obj, 1);
-   reg_state = kmap_atomic(page);
+   i915_gem_object_pin_pages(ctx_obj);
 
seq_printf(m, CONTEXT: %s %u\n, ring-name,

intel_execlists_ctx_id(ctx_obj));
 
+   if (!i915_gem_obj_ggtt_bound(ctx_obj))
+   seq_puts(m, \tNot bound in GGTT\n);
+   else
+   ggtt_offset = 
i915_gem_obj_ggtt_offset(ctx_obj);
+
+   page = i915_gem_object_get_page(ctx_obj, 1);
+   reg_state = kmap_atomic(page);
+
for (j = 0; j  0x600 / sizeof(u32) / 4; j += 
4) {
seq_printf(m, \t[0x%08lx] 0x%08x 
0x%08x 0x%08x 0x%08x\n,
-   i915_gem_obj_ggtt_offset(ctx_obj) + 
4096 + (j * 4),
+   ggtt_offset + 4096 + (j * 4),
reg_state[j], reg_state[j + 1],
reg_state[j + 2], reg_state[j + 3]);
}
kunmap_atomic(reg_state);
 
-   i915_gem_object_ggtt_unpin(ctx_obj);
+   i915_gem_object_unpin_pages(ctx_obj);
 
seq_putc(m, '\n');
}
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v5 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work

2014-11-13 Thread Thomas Daniel
No longer create a work item to clean each execlist queue item.
Instead, move retired execlist requests to a queue and clean up the
items during retire_requests.

v2: Fix legacy ring path broken during overzealous cleanup

v3: Update idle detection to take execlists queue into account

v4: Grab execlist lock when checking queue state

v5: Fix leaking requests by freeing in execlists_retire_requests.

Issue: VIZ-4274
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c |9 ++
 drivers/gpu/drm/i915/intel_lrc.c|   53 ++-
 drivers/gpu/drm/i915/intel_lrc.h|2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |1 +
 4 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 827edb5..408afe7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2718,6 +2718,15 @@ i915_gem_retire_requests(struct drm_device *dev)
for_each_ring(ring, dev_priv, i) {
i915_gem_retire_requests_ring(ring);
idle = list_empty(ring-request_list);
+   if (i915.enable_execlists) {
+   unsigned long flags;
+
+   spin_lock_irqsave(ring-execlist_lock, flags);
+   idle = list_empty(ring-execlist_queue);
+   spin_unlock_irqrestore(ring-execlist_lock, flags);
+
+   intel_execlists_retire_requests(ring);
+   }
}
 
if (idle)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cd74e5c..d920297 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -386,7 +386,6 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 {
struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
-   struct drm_i915_private *dev_priv = ring-dev-dev_private;
 
assert_spin_locked(ring-execlist_lock);
 
@@ -403,7 +402,8 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 * will update tail past first request's workload */
cursor-elsp_submitted = req0-elsp_submitted;
list_del(req0-execlist_link);
-   queue_work(dev_priv-wq, req0-work);
+   list_add_tail(req0-execlist_link,
+   ring-execlist_retired_req_list);
req0 = cursor;
} else {
req1 = cursor;
@@ -425,7 +425,6 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 static bool execlists_check_remove_request(struct intel_engine_cs *ring,
   u32 request_id)
 {
-   struct drm_i915_private *dev_priv = ring-dev-dev_private;
struct intel_ctx_submit_request *head_req;
 
assert_spin_locked(ring-execlist_lock);
@@ -443,7 +442,8 @@ static bool execlists_check_remove_request(struct 
intel_engine_cs *ring,
 
if (--head_req-elsp_submitted = 0) {
list_del(head_req-execlist_link);
-   queue_work(dev_priv-wq, head_req-work);
+   list_add_tail(head_req-execlist_link,
+   ring-execlist_retired_req_list);
return true;
}
}
@@ -512,22 +512,6 @@ void intel_execlists_handle_ctx_events(struct 
intel_engine_cs *ring)
   ((u32)ring-next_context_status_buffer  0x07)  8);
 }
 
-static void execlists_free_request_task(struct work_struct *work)
-{
-   struct intel_ctx_submit_request *req =
-   container_of(work, struct intel_ctx_submit_request, work);
-   struct drm_device *dev = req-ring-dev;
-   struct drm_i915_private *dev_priv = dev-dev_private;
-
-   intel_runtime_pm_put(dev_priv);
-
-   mutex_lock(dev-struct_mutex);
-   i915_gem_context_unreference(req-ctx);
-   mutex_unlock(dev-struct_mutex);
-
-   kfree(req);
-}
-
 static int execlists_context_queue(struct intel_engine_cs *ring,
   struct intel_context *to,
   u32 tail)
@@ -544,7 +528,6 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
i915_gem_context_reference(req-ctx);
req-ring = ring;
req-tail = tail;
-   INIT_WORK(req-work, execlists_free_request_task);
 
intel_runtime_pm_get(dev_priv);
 
@@ -565,7 +548,8 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
WARN(tail_req-elsp_submitted != 0,
 More than 2 already-submitted reqs queued\n);
list_del(tail_req

[Intel-gfx] [PATCH v5 4/4] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand

2014-11-13 Thread Thomas Daniel
Same as with the context, pinning to GGTT regardless is harmful (it
badly fragments the GGTT and can even exhaust it).

Unfortunately, this case is also more complex than the previous one
because we need to map and access the ringbuffer in several places
along the execbuffer path (and we cannot make do by leaving the
default ringbuffer pinned, as before). Also, the context object
itself contains a pointer to the ringbuffer address that we have to
keep updated if we are going to allow the ringbuffer to move around.

v2: Same as with the context pinning, we cannot really do it during
an interrupt. Also, pin the default ringbuffers objects regardless
(makes error capture a lot easier).

v3: Rebased. Take a pin reference of the ringbuffer for each item
in the execlist request queue because the hardware may still be using
the ringbuffer after the MI_USER_INTERRUPT to notify the seqno update
is executed.  The ringbuffer must remain pinned until the context save
is complete.  No longer pin and unpin ringbuffer in
populate_lr_context() - this transient address is meaningless and the
pinning can cause a sleep while atomic.

v4: Moved ringbuffer pin and unpin into the lr_context_pin functions.
Downgraded pinning check BUG_ONs to WARN_ONs.

v5: Reinstated WARN_ONs for unexpected execlist states.  Removed unused
variable.

Issue: VIZ-4277
Signed-off-by: Oscar Mateo oscar.ma...@intel.com
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c|  102 +++
 drivers/gpu/drm/i915/intel_ringbuffer.c |   85 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.h |3 +
 3 files changed, 128 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f7fa0f7..ca20f91 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -202,6 +202,9 @@ enum {
 };
 #define GEN8_CTX_ID_SHIFT 32
 
+static int intel_lr_context_pin(struct intel_engine_cs *ring,
+   struct intel_context *ctx);
+
 /**
  * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
  * @dev: DRM device.
@@ -339,7 +342,9 @@ static void execlists_elsp_write(struct intel_engine_cs 
*ring,
spin_unlock_irqrestore(dev_priv-uncore.lock, flags);
 }
 
-static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 
tail)
+static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
+   struct drm_i915_gem_object *ring_obj,
+   u32 tail)
 {
struct page *page;
uint32_t *reg_state;
@@ -348,6 +353,7 @@ static int execlists_ctx_write_tail(struct 
drm_i915_gem_object *ctx_obj, u32 tai
reg_state = kmap_atomic(page);
 
reg_state[CTX_RING_TAIL+1] = tail;
+   reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
 
kunmap_atomic(reg_state);
 
@@ -358,21 +364,25 @@ static int execlists_submit_context(struct 
intel_engine_cs *ring,
struct intel_context *to0, u32 tail0,
struct intel_context *to1, u32 tail1)
 {
-   struct drm_i915_gem_object *ctx_obj0;
+   struct drm_i915_gem_object *ctx_obj0 = to0-engine[ring-id].state;
+   struct intel_ringbuffer *ringbuf0 = to0-engine[ring-id].ringbuf;
struct drm_i915_gem_object *ctx_obj1 = NULL;
+   struct intel_ringbuffer *ringbuf1 = NULL;
 
-   ctx_obj0 = to0-engine[ring-id].state;
BUG_ON(!ctx_obj0);
WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
+   WARN_ON(!i915_gem_obj_is_pinned(ringbuf0-obj));
 
-   execlists_ctx_write_tail(ctx_obj0, tail0);
+   execlists_update_context(ctx_obj0, ringbuf0-obj, tail0);
 
if (to1) {
+   ringbuf1 = to1-engine[ring-id].ringbuf;
ctx_obj1 = to1-engine[ring-id].state;
BUG_ON(!ctx_obj1);
WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
+   WARN_ON(!i915_gem_obj_is_pinned(ringbuf1-obj));
 
-   execlists_ctx_write_tail(ctx_obj1, tail1);
+   execlists_update_context(ctx_obj1, ringbuf1-obj, tail1);
}
 
execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
@@ -524,6 +534,10 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
return -ENOMEM;
req-ctx = to;
i915_gem_context_reference(req-ctx);
+
+   if (to != ring-default_context)
+   intel_lr_context_pin(ring, to);
+
req-ring = ring;
req-tail = tail;
 
@@ -544,7 +558,7 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
 
if (to == tail_req-ctx) {
WARN(tail_req-elsp_submitted != 0,
-More than 2 already-submitted reqs queued\n);
+   More than 2 already-submitted reqs queued\n

[Intel-gfx] [PATCH v5 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand

2014-11-13 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Up until now, we have pinned every logical ring context backing object
during creation, and left it pinned until destruction. This made my life
easier, but it's a harmful thing to do, because we cause fragmentation
of the GGTT (and, eventually, we would run out of space).

This patch makes the pinning on-demand: the backing objects of the two
contexts that are written to the ELSP are pinned right before submission
and unpinned once the hardware is done with them. The only context that
is still pinned regardless is the global default one, so that the HWS can
still be accessed in the same way (ring-status_page).

v2: In the early version of this patch, we were pinning the context as
we put it into the ELSP: on the one hand, this is very efficient because
only a maximum two contexts are pinned at any given time, but on the other
hand, we cannot really pin in interrupt time :(

v3: Use a mutex rather than atomic_t to protect pin count to avoid races.
Do not unpin default context in free_request.

v4: Break out pin and unpin into functions.  Fix style problems reported
by checkpatch

v5: Remove unpin_lock as all pinning and unpinning is done with the struct
mutex already locked.  Add WARN_ONs to make sure this is the case in future.

Issue: VIZ-4277
Signed-off-by: Oscar Mateo oscar.ma...@intel.com
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c |   12 +-
 drivers/gpu/drm/i915/i915_drv.h |1 +
 drivers/gpu/drm/i915/i915_gem.c |   39 +---
 drivers/gpu/drm/i915/intel_lrc.c|   69 +--
 drivers/gpu/drm/i915/intel_lrc.h|4 ++
 5 files changed, 98 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index e60d5c2..6eaf813 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1799,10 +1799,16 @@ static int i915_dump_lrc(struct seq_file *m, void 
*unused)
continue;
 
if (ctx_obj) {
-   struct page *page = 
i915_gem_object_get_page(ctx_obj, 1);
-   uint32_t *reg_state = kmap_atomic(page);
+   struct page *page;
+   uint32_t *reg_state;
int j;
 
+   i915_gem_obj_ggtt_pin(ctx_obj,
+   GEN8_LR_CONTEXT_ALIGN, 0);
+
+   page = i915_gem_object_get_page(ctx_obj, 1);
+   reg_state = kmap_atomic(page);
+
seq_printf(m, CONTEXT: %s %u\n, ring-name,

intel_execlists_ctx_id(ctx_obj));
 
@@ -1814,6 +1820,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
}
kunmap_atomic(reg_state);
 
+   i915_gem_object_ggtt_unpin(ctx_obj);
+
seq_putc(m, '\n');
}
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 059330c..3c7299d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -655,6 +655,7 @@ struct intel_context {
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
+   int unpin_count;
} engine[I915_NUM_RINGS];
 
struct list_head link;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 408afe7..2ee6996 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2494,12 +2494,18 @@ static void i915_set_reset_status(struct 
drm_i915_private *dev_priv,
 
 static void i915_gem_free_request(struct drm_i915_gem_request *request)
 {
+   struct intel_context *ctx = request-ctx;
+
list_del(request-list);
i915_gem_request_remove_from_client(request);
 
-   if (request-ctx)
-   i915_gem_context_unreference(request-ctx);
+   if (i915.enable_execlists  ctx) {
+   struct intel_engine_cs *ring = request-ring;
 
+   if (ctx != ring-default_context)
+   intel_lr_context_unpin(ring, ctx);
+   i915_gem_context_unreference(ctx);
+   }
kfree(request);
 }
 
@@ -2554,6 +2560,23 @@ static void i915_gem_reset_ring_cleanup(struct 
drm_i915_private *dev_priv,
}
 
/*
+* Clear the execlists queue up before freeing the requests, as those
+* are the ones that keep the context and ringbuffer backing objects
+* pinned in place.
+*/
+   while (!list_empty(ring-execlist_queue)) {
+   struct intel_ctx_submit_request *submit_req

[Intel-gfx] [PATCH v4 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work

2014-11-07 Thread Thomas Daniel
No longer create a work item to clean each execlist queue item.
Instead, move retired execlist requests to a queue and clean up the
items during retire_requests.

v2: Fix legacy ring path broken during overzealous cleanup

v3: Update idle detection to take execlists queue into account

v4: Grab execlist lock when checking queue state

Issue: VIZ-4274
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c |9 ++
 drivers/gpu/drm/i915/intel_lrc.c|   52 ++-
 drivers/gpu/drm/i915/intel_lrc.h|2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |1 +
 4 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 827edb5..408afe7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2718,6 +2718,15 @@ i915_gem_retire_requests(struct drm_device *dev)
for_each_ring(ring, dev_priv, i) {
i915_gem_retire_requests_ring(ring);
idle = list_empty(ring-request_list);
+   if (i915.enable_execlists) {
+   unsigned long flags;
+
+   spin_lock_irqsave(ring-execlist_lock, flags);
+   idle = list_empty(ring-execlist_queue);
+   spin_unlock_irqrestore(ring-execlist_lock, flags);
+
+   intel_execlists_retire_requests(ring);
+   }
}
 
if (idle)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cd74e5c..87ce445 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -386,7 +386,6 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 {
struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
-   struct drm_i915_private *dev_priv = ring-dev-dev_private;
 
assert_spin_locked(ring-execlist_lock);
 
@@ -403,7 +402,8 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 * will update tail past first request's workload */
cursor-elsp_submitted = req0-elsp_submitted;
list_del(req0-execlist_link);
-   queue_work(dev_priv-wq, req0-work);
+   list_add_tail(req0-execlist_link,
+   ring-execlist_retired_req_list);
req0 = cursor;
} else {
req1 = cursor;
@@ -425,7 +425,6 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 static bool execlists_check_remove_request(struct intel_engine_cs *ring,
   u32 request_id)
 {
-   struct drm_i915_private *dev_priv = ring-dev-dev_private;
struct intel_ctx_submit_request *head_req;
 
assert_spin_locked(ring-execlist_lock);
@@ -443,7 +442,8 @@ static bool execlists_check_remove_request(struct 
intel_engine_cs *ring,
 
if (--head_req-elsp_submitted = 0) {
list_del(head_req-execlist_link);
-   queue_work(dev_priv-wq, head_req-work);
+   list_add_tail(head_req-execlist_link,
+   ring-execlist_retired_req_list);
return true;
}
}
@@ -512,22 +512,6 @@ void intel_execlists_handle_ctx_events(struct 
intel_engine_cs *ring)
   ((u32)ring-next_context_status_buffer  0x07)  8);
 }
 
-static void execlists_free_request_task(struct work_struct *work)
-{
-   struct intel_ctx_submit_request *req =
-   container_of(work, struct intel_ctx_submit_request, work);
-   struct drm_device *dev = req-ring-dev;
-   struct drm_i915_private *dev_priv = dev-dev_private;
-
-   intel_runtime_pm_put(dev_priv);
-
-   mutex_lock(dev-struct_mutex);
-   i915_gem_context_unreference(req-ctx);
-   mutex_unlock(dev-struct_mutex);
-
-   kfree(req);
-}
-
 static int execlists_context_queue(struct intel_engine_cs *ring,
   struct intel_context *to,
   u32 tail)
@@ -544,7 +528,6 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
i915_gem_context_reference(req-ctx);
req-ring = ring;
req-tail = tail;
-   INIT_WORK(req-work, execlists_free_request_task);
 
intel_runtime_pm_get(dev_priv);
 
@@ -565,7 +548,8 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
WARN(tail_req-elsp_submitted != 0,
 More than 2 already-submitted reqs queued\n);
list_del(tail_req-execlist_link);
-   queue_work(dev_priv-wq

[Intel-gfx] [PATCH 4/4] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand

2014-10-29 Thread Thomas Daniel
Same as with the context, pinning to GGTT regardless is harmful (it
badly fragments the GGTT and can even exhaust it).

Unfortunately, this case is also more complex than the previous one
because we need to map and access the ringbuffer in several places
along the execbuffer path (and we cannot make do by leaving the
default ringbuffer pinned, as before). Also, the context object
itself contains a pointer to the ringbuffer address that we have to
keep updated if we are going to allow the ringbuffer to move around.

v2: Same as with the context pinning, we cannot really do it during
an interrupt. Also, pin the default ringbuffers objects regardless
(makes error capture a lot easier).

v3: Rebased. Take a pin reference of the ringbuffer for each item
in the execlist request queue because the hardware may still be using
the ringbuffer after the MI_USER_INTERRUPT to notify the seqno update
is executed.  The ringbuffer must remain pinned until the context save
is complete.  No longer pin and unpin ringbuffer in
populate_lr_context() - this transient address is meaningless and the
pinning can cause a sleep while atomic.

v4: Moved ringbuffer pin and unpin into the lr_context_pin functions.
Downgraded pinning check BUG_ONs to WARN_ONs.

Issue: VIZ-4277
Signed-off-by: Oscar Mateo oscar.ma...@intel.com
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c|  110 ++-
 drivers/gpu/drm/i915/intel_lrc.h|1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |   85 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |3 +
 4 files changed, 133 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7950357..b5ae4fa 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -202,6 +202,9 @@ enum {
 };
 #define GEN8_CTX_ID_SHIFT 32
 
+static int intel_lr_context_pin(struct intel_engine_cs *ring,
+   struct intel_context *ctx);
+
 /**
  * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
  * @dev: DRM device.
@@ -339,7 +342,9 @@ static void execlists_elsp_write(struct intel_engine_cs 
*ring,
spin_unlock_irqrestore(dev_priv-uncore.lock, flags);
 }
 
-static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 
tail)
+static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
+   struct drm_i915_gem_object *ring_obj,
+   u32 tail)
 {
struct page *page;
uint32_t *reg_state;
@@ -348,6 +353,7 @@ static int execlists_ctx_write_tail(struct 
drm_i915_gem_object *ctx_obj, u32 tai
reg_state = kmap_atomic(page);
 
reg_state[CTX_RING_TAIL+1] = tail;
+   reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
 
kunmap_atomic(reg_state);
 
@@ -358,21 +364,25 @@ static int execlists_submit_context(struct 
intel_engine_cs *ring,
struct intel_context *to0, u32 tail0,
struct intel_context *to1, u32 tail1)
 {
-   struct drm_i915_gem_object *ctx_obj0;
+   struct drm_i915_gem_object *ctx_obj0 = to0-engine[ring-id].state;
+   struct intel_ringbuffer *ringbuf0 = to0-engine[ring-id].ringbuf;
struct drm_i915_gem_object *ctx_obj1 = NULL;
+   struct intel_ringbuffer *ringbuf1 = NULL;
 
-   ctx_obj0 = to0-engine[ring-id].state;
BUG_ON(!ctx_obj0);
WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
+   WARN_ON(!i915_gem_obj_is_pinned(ringbuf0-obj));
 
-   execlists_ctx_write_tail(ctx_obj0, tail0);
+   execlists_update_context(ctx_obj0, ringbuf0-obj, tail0);
 
if (to1) {
+   ringbuf1 = to1-engine[ring-id].ringbuf;
ctx_obj1 = to1-engine[ring-id].state;
BUG_ON(!ctx_obj1);
WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
+   WARN_ON(!i915_gem_obj_is_pinned(ringbuf1-obj));
 
-   execlists_ctx_write_tail(ctx_obj1, tail1);
+   execlists_update_context(ctx_obj1, ringbuf1-obj, tail1);
}
 
execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
@@ -435,9 +445,9 @@ static bool execlists_check_remove_request(struct 
intel_engine_cs *ring,
struct drm_i915_gem_object *ctx_obj =
head_req-ctx-engine[ring-id].state;
if (intel_execlists_ctx_id(ctx_obj) == request_id) {
-   WARN(head_req-elsp_submitted == 0,
-Never submitted head request\n);
 
+   /* If the request has been merged, it is possible to get
+* here with an unsubmitted request. */
if (--head_req-elsp_submitted = 0) {
list_del(head_req-execlist_link);
list_add_tail(head_req

[Intel-gfx] [PATCH 3/4] drm/i915/bdw: Pin the context backing objects to GGTT on-demand

2014-10-29 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Up until now, we have pinned every logical ring context backing object
during creation, and left it pinned until destruction. This made my life
easier, but it's a harmful thing to do, because we cause fragmentation
of the GGTT (and, eventually, we would run out of space).

This patch makes the pinning on-demand: the backing objects of the two
contexts that are written to the ELSP are pinned right before submission
and unpinned once the hardware is done with them. The only context that
is still pinned regardless is the global default one, so that the HWS can
still be accessed in the same way (ring-status_page).

v2: In the early version of this patch, we were pinning the context as
we put it into the ELSP: on the one hand, this is very efficient because
only a maximum two contexts are pinned at any given time, but on the other
hand, we cannot really pin in interrupt time :(

v3: Use a mutex rather than atomic_t to protect pin count to avoid races.
Do not unpin default context in free_request.

v4: Break out pin and unpin into functions.  Fix style problems reported
by checkpatch

Issue: VIZ-4277
Signed-off-by: Oscar Mateo oscar.ma...@intel.com
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c |   12 +-
 drivers/gpu/drm/i915/i915_drv.h |2 +
 drivers/gpu/drm/i915/i915_gem.c |   39 ---
 drivers/gpu/drm/i915/intel_lrc.c|   73 +--
 drivers/gpu/drm/i915/intel_lrc.h|4 ++
 5 files changed, 103 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index e60d5c2..6eaf813 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1799,10 +1799,16 @@ static int i915_dump_lrc(struct seq_file *m, void 
*unused)
continue;
 
if (ctx_obj) {
-   struct page *page = 
i915_gem_object_get_page(ctx_obj, 1);
-   uint32_t *reg_state = kmap_atomic(page);
+   struct page *page;
+   uint32_t *reg_state;
int j;
 
+   i915_gem_obj_ggtt_pin(ctx_obj,
+   GEN8_LR_CONTEXT_ALIGN, 0);
+
+   page = i915_gem_object_get_page(ctx_obj, 1);
+   reg_state = kmap_atomic(page);
+
seq_printf(m, CONTEXT: %s %u\n, ring-name,

intel_execlists_ctx_id(ctx_obj));
 
@@ -1814,6 +1820,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
}
kunmap_atomic(reg_state);
 
+   i915_gem_object_ggtt_unpin(ctx_obj);
+
seq_putc(m, '\n');
}
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 059330c..632b88d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -655,6 +655,8 @@ struct intel_context {
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
+   int unpin_count;
+   struct mutex unpin_lock;
} engine[I915_NUM_RINGS];
 
struct list_head link;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index df28202..8a00dea 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2494,12 +2494,18 @@ static void i915_set_reset_status(struct 
drm_i915_private *dev_priv,
 
 static void i915_gem_free_request(struct drm_i915_gem_request *request)
 {
+   struct intel_context *ctx = request-ctx;
+
list_del(request-list);
i915_gem_request_remove_from_client(request);
 
-   if (request-ctx)
-   i915_gem_context_unreference(request-ctx);
+   if (i915.enable_execlists  ctx) {
+   struct intel_engine_cs *ring = request-ring;
 
+   if (ctx != ring-default_context)
+   intel_lr_context_unpin(ring, ctx);
+   i915_gem_context_unreference(ctx);
+   }
kfree(request);
 }
 
@@ -2554,6 +2560,23 @@ static void i915_gem_reset_ring_cleanup(struct 
drm_i915_private *dev_priv,
}
 
/*
+* Clear the execlists queue up before freeing the requests, as those
+* are the ones that keep the context and ringbuffer backing objects
+* pinned in place.
+*/
+   while (!list_empty(ring-execlist_queue)) {
+   struct intel_ctx_submit_request *submit_req;
+
+   submit_req = list_first_entry(ring-execlist_queue,
+   struct intel_ctx_submit_request

[Intel-gfx] [PATCH 2/4] drm/i915/bdw: Setup global hardware status page in execlists mode

2014-10-29 Thread Thomas Daniel
Write HWS_PGA address even in execlists mode as the global hardware status
page is still required.  This address was previously uninitialized and
HWSP writes would clobber whatever buffer happened to reside at GGTT
address 0.

v2: Break out hardware status page setup into a separate function.

Issue: VIZ-2020
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c |   34 ++
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 87ce445..6b8bf0d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1657,6 +1657,27 @@ static uint32_t get_lr_context_size(struct 
intel_engine_cs *ring)
return ret;
 }
 
+static int lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
+   struct drm_i915_gem_object *default_ctx_obj)
+{
+   struct drm_i915_private *dev_priv = ring-dev-dev_private;
+
+   /* The status page is offset 0 from the default context object
+* in LRC mode. */
+   ring-status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj);
+   ring-status_page.page_addr =
+   kmap(sg_page(default_ctx_obj-pages-sgl));
+   if (ring-status_page.page_addr == NULL)
+   return -ENOMEM;
+   ring-status_page.obj = default_ctx_obj;
+
+   I915_WRITE(RING_HWS_PGA(ring-mmio_base),
+   (u32)ring-status_page.gfx_addr);
+   POSTING_READ(RING_HWS_PGA(ring-mmio_base));
+
+   return 0;
+}
+
 /**
  * intel_lr_context_deferred_create() - create the LRC specific bits of a 
context
  * @ctx: LR context to create.
@@ -1742,14 +1763,11 @@ int intel_lr_context_deferred_create(struct 
intel_context *ctx,
ctx-engine[ring-id].state = ctx_obj;
 
if (ctx == ring-default_context) {
-   /* The status page is offset 0 from the default context object
-* in LRC mode. */
-   ring-status_page.gfx_addr = i915_gem_obj_ggtt_offset(ctx_obj);
-   ring-status_page.page_addr =
-   kmap(sg_page(ctx_obj-pages-sgl));
-   if (ring-status_page.page_addr == NULL)
-   return -ENOMEM;
-   ring-status_page.obj = ctx_obj;
+   ret = lrc_setup_hardware_status_page(ring, ctx_obj);
+   if (ret) {
+   DRM_ERROR(Failed to setup hardware status page\n);
+   goto error;
+   }
}
 
if (ring-id == RCS  !ctx-rcs_initialized) {
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/4] drm/i915/bdw: Clean up execlist queue items in retire_work

2014-10-29 Thread Thomas Daniel
No longer create a work item to clean each execlist queue item.
Instead, move retired execlist requests to a queue and clean up the
items during retire_requests.

v2: Fix legacy ring path broken during overzealous cleanup

v3: Update idle detection to take execlists queue into account

Issue: VIZ-4274
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c |4 +++
 drivers/gpu/drm/i915/intel_lrc.c|   52 ++-
 drivers/gpu/drm/i915/intel_lrc.h|2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |1 +
 4 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 827edb5..df28202 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2718,6 +2718,10 @@ i915_gem_retire_requests(struct drm_device *dev)
for_each_ring(ring, dev_priv, i) {
i915_gem_retire_requests_ring(ring);
idle = list_empty(ring-request_list);
+   if (i915.enable_execlists) {
+   idle = list_empty(ring-execlist_queue);
+   intel_execlists_retire_requests(ring);
+   }
}
 
if (idle)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cd74e5c..87ce445 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -386,7 +386,6 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 {
struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
-   struct drm_i915_private *dev_priv = ring-dev-dev_private;
 
assert_spin_locked(ring-execlist_lock);
 
@@ -403,7 +402,8 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 * will update tail past first request's workload */
cursor-elsp_submitted = req0-elsp_submitted;
list_del(req0-execlist_link);
-   queue_work(dev_priv-wq, req0-work);
+   list_add_tail(req0-execlist_link,
+   ring-execlist_retired_req_list);
req0 = cursor;
} else {
req1 = cursor;
@@ -425,7 +425,6 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 static bool execlists_check_remove_request(struct intel_engine_cs *ring,
   u32 request_id)
 {
-   struct drm_i915_private *dev_priv = ring-dev-dev_private;
struct intel_ctx_submit_request *head_req;
 
assert_spin_locked(ring-execlist_lock);
@@ -443,7 +442,8 @@ static bool execlists_check_remove_request(struct 
intel_engine_cs *ring,
 
if (--head_req-elsp_submitted = 0) {
list_del(head_req-execlist_link);
-   queue_work(dev_priv-wq, head_req-work);
+   list_add_tail(head_req-execlist_link,
+   ring-execlist_retired_req_list);
return true;
}
}
@@ -512,22 +512,6 @@ void intel_execlists_handle_ctx_events(struct 
intel_engine_cs *ring)
   ((u32)ring-next_context_status_buffer  0x07)  8);
 }
 
-static void execlists_free_request_task(struct work_struct *work)
-{
-   struct intel_ctx_submit_request *req =
-   container_of(work, struct intel_ctx_submit_request, work);
-   struct drm_device *dev = req-ring-dev;
-   struct drm_i915_private *dev_priv = dev-dev_private;
-
-   intel_runtime_pm_put(dev_priv);
-
-   mutex_lock(dev-struct_mutex);
-   i915_gem_context_unreference(req-ctx);
-   mutex_unlock(dev-struct_mutex);
-
-   kfree(req);
-}
-
 static int execlists_context_queue(struct intel_engine_cs *ring,
   struct intel_context *to,
   u32 tail)
@@ -544,7 +528,6 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
i915_gem_context_reference(req-ctx);
req-ring = ring;
req-tail = tail;
-   INIT_WORK(req-work, execlists_free_request_task);
 
intel_runtime_pm_get(dev_priv);
 
@@ -565,7 +548,8 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
WARN(tail_req-elsp_submitted != 0,
 More than 2 already-submitted reqs queued\n);
list_del(tail_req-execlist_link);
-   queue_work(dev_priv-wq, tail_req-work);
+   list_add_tail(tail_req-execlist_link,
+   ring-execlist_retired_req_list);
}
}
 
@@ -733,6 +717,29 @@ int intel_execlists_submission(struct drm_device *dev

[Intel-gfx] [PATCH] drm/i915/bdw: Setup global hardware status page in execlists mode

2014-10-24 Thread Thomas Daniel
Write HWS_PGA address even in execlists mode as the global hardware status
page is still required.  This address was previously uninitialized and
HWSP writes would clobber whatever buffer happened to reside at GGTT
address 0.

Issue: VIZ-2020
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c |5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 666cb28..ad36d66 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1678,6 +1678,7 @@ int intel_lr_context_deferred_create(struct intel_context 
*ctx,
uint32_t context_size;
struct intel_ringbuffer *ringbuf;
int ret;
+   struct drm_i915_private *dev_priv = dev-dev_private;
 
WARN_ON(ctx-legacy_hw_ctx.rcs_state != NULL);
if (ctx-engine[ring-id].state)
@@ -1750,6 +1751,10 @@ int intel_lr_context_deferred_create(struct 
intel_context *ctx,
if (ring-status_page.page_addr == NULL)
return -ENOMEM;
ring-status_page.obj = ctx_obj;
+
+   I915_WRITE(RING_HWS_PGA(ring-mmio_base),
+   (u32)ring-status_page.gfx_addr);
+   POSTING_READ(RING_HWS_PGA(ring-mmio_base));
}
 
if (ring-id == RCS  !ctx-rcs_initialized) {
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915/bdw: Setup global hardware status page in execlists mode

2014-10-23 Thread Thomas Daniel
Write HWS_PGA address even in execlists mode as the global hardware status
page is still required.  This address was previously uninitialized and
HWSP writes would clobber whatever buffer happened to reside at GGTT
address 0.

Issue: VIZ-2020
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c |5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 666cb28..ad36d66 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1678,6 +1678,7 @@ int intel_lr_context_deferred_create(struct intel_context 
*ctx,
uint32_t context_size;
struct intel_ringbuffer *ringbuf;
int ret;
+   struct drm_i915_private *dev_priv = dev-dev_private;
 
WARN_ON(ctx-legacy_hw_ctx.rcs_state != NULL);
if (ctx-engine[ring-id].state)
@@ -1750,6 +1751,10 @@ int intel_lr_context_deferred_create(struct 
intel_context *ctx,
if (ring-status_page.page_addr == NULL)
return -ENOMEM;
ring-status_page.obj = ctx_obj;
+
+   I915_WRITE(RING_HWS_PGA(ring-mmio_base),
+   (u32)ring-status_page.gfx_addr);
+   POSTING_READ(RING_HWS_PGA(ring-mmio_base));
}
 
if (ring-id == RCS  !ctx-rcs_initialized) {
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/bdw: Clean up execlist queue items in retire_work

2014-10-20 Thread Thomas Daniel
No longer create a work item to clean each execlist queue item.
Instead, move retired execlist requests to a queue and clean up the
items during retire_requests.

Issue: VIZ-4274
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c |1 +
 drivers/gpu/drm/i915/intel_lrc.c|   52 ++-
 drivers/gpu/drm/i915/intel_lrc.h|2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |1 +
 4 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 895f988..6a3e0ea 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2718,6 +2718,7 @@ i915_gem_retire_requests(struct drm_device *dev)
for_each_ring(ring, dev_priv, i) {
i915_gem_retire_requests_ring(ring);
idle = list_empty(ring-request_list);
+   intel_execlists_retire_requests(ring);
}
 
if (idle)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 803fc38..666cb28 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -386,7 +386,6 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 {
struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
-   struct drm_i915_private *dev_priv = ring-dev-dev_private;
 
assert_spin_locked(ring-execlist_lock);
 
@@ -403,7 +402,8 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 * will update tail past first request's workload */
cursor-elsp_submitted = req0-elsp_submitted;
list_del(req0-execlist_link);
-   queue_work(dev_priv-wq, req0-work);
+   list_add_tail(req0-execlist_link,
+   ring-execlist_retired_req_list);
req0 = cursor;
} else {
req1 = cursor;
@@ -425,7 +425,6 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 static bool execlists_check_remove_request(struct intel_engine_cs *ring,
   u32 request_id)
 {
-   struct drm_i915_private *dev_priv = ring-dev-dev_private;
struct intel_ctx_submit_request *head_req;
 
assert_spin_locked(ring-execlist_lock);
@@ -443,7 +442,8 @@ static bool execlists_check_remove_request(struct 
intel_engine_cs *ring,
 
if (--head_req-elsp_submitted = 0) {
list_del(head_req-execlist_link);
-   queue_work(dev_priv-wq, head_req-work);
+   list_add_tail(head_req-execlist_link,
+   ring-execlist_retired_req_list);
return true;
}
}
@@ -512,22 +512,6 @@ void intel_execlists_handle_ctx_events(struct 
intel_engine_cs *ring)
   ((u32)ring-next_context_status_buffer  0x07)  8);
 }
 
-static void execlists_free_request_task(struct work_struct *work)
-{
-   struct intel_ctx_submit_request *req =
-   container_of(work, struct intel_ctx_submit_request, work);
-   struct drm_device *dev = req-ring-dev;
-   struct drm_i915_private *dev_priv = dev-dev_private;
-
-   intel_runtime_pm_put(dev_priv);
-
-   mutex_lock(dev-struct_mutex);
-   i915_gem_context_unreference(req-ctx);
-   mutex_unlock(dev-struct_mutex);
-
-   kfree(req);
-}
-
 static int execlists_context_queue(struct intel_engine_cs *ring,
   struct intel_context *to,
   u32 tail)
@@ -544,7 +528,6 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
i915_gem_context_reference(req-ctx);
req-ring = ring;
req-tail = tail;
-   INIT_WORK(req-work, execlists_free_request_task);
 
intel_runtime_pm_get(dev_priv);
 
@@ -565,7 +548,8 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
WARN(tail_req-elsp_submitted != 0,
 More than 2 already-submitted reqs queued\n);
list_del(tail_req-execlist_link);
-   queue_work(dev_priv-wq, tail_req-work);
+   list_add_tail(tail_req-execlist_link,
+   ring-execlist_retired_req_list);
}
}
 
@@ -733,6 +717,29 @@ int intel_execlists_submission(struct drm_device *dev, 
struct drm_file *file,
return 0;
 }
 
+void intel_execlists_retire_requests(struct intel_engine_cs *ring)
+{
+   struct intel_ctx_submit_request *req, *tmp;
+   struct drm_i915_private *dev_priv = ring-dev-dev_private;
+   unsigned long

[Intel-gfx] [PATCH] drm/i915/bdw: Clean up execlist queue items in retire_work

2014-10-20 Thread Thomas Daniel
No longer create a work item to clean each execlist queue item.
Instead, move retired execlist requests to a queue and clean up the
items during retire_requests.

v2: Fix legacy ring path broken during overzealous cleanup

Issue: VIZ-4274
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c |2 ++
 drivers/gpu/drm/i915/intel_lrc.c|   52 ++-
 drivers/gpu/drm/i915/intel_lrc.h|2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |1 +
 4 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 895f988..65cfe9a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2718,6 +2718,8 @@ i915_gem_retire_requests(struct drm_device *dev)
for_each_ring(ring, dev_priv, i) {
i915_gem_retire_requests_ring(ring);
idle = list_empty(ring-request_list);
+   if (i915.enable_execlists)
+   intel_execlists_retire_requests(ring);
}
 
if (idle)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 803fc38..666cb28 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -386,7 +386,6 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 {
struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
-   struct drm_i915_private *dev_priv = ring-dev-dev_private;
 
assert_spin_locked(ring-execlist_lock);
 
@@ -403,7 +402,8 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 * will update tail past first request's workload */
cursor-elsp_submitted = req0-elsp_submitted;
list_del(req0-execlist_link);
-   queue_work(dev_priv-wq, req0-work);
+   list_add_tail(req0-execlist_link,
+   ring-execlist_retired_req_list);
req0 = cursor;
} else {
req1 = cursor;
@@ -425,7 +425,6 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
 static bool execlists_check_remove_request(struct intel_engine_cs *ring,
   u32 request_id)
 {
-   struct drm_i915_private *dev_priv = ring-dev-dev_private;
struct intel_ctx_submit_request *head_req;
 
assert_spin_locked(ring-execlist_lock);
@@ -443,7 +442,8 @@ static bool execlists_check_remove_request(struct 
intel_engine_cs *ring,
 
if (--head_req-elsp_submitted = 0) {
list_del(head_req-execlist_link);
-   queue_work(dev_priv-wq, head_req-work);
+   list_add_tail(head_req-execlist_link,
+   ring-execlist_retired_req_list);
return true;
}
}
@@ -512,22 +512,6 @@ void intel_execlists_handle_ctx_events(struct 
intel_engine_cs *ring)
   ((u32)ring-next_context_status_buffer  0x07)  8);
 }
 
-static void execlists_free_request_task(struct work_struct *work)
-{
-   struct intel_ctx_submit_request *req =
-   container_of(work, struct intel_ctx_submit_request, work);
-   struct drm_device *dev = req-ring-dev;
-   struct drm_i915_private *dev_priv = dev-dev_private;
-
-   intel_runtime_pm_put(dev_priv);
-
-   mutex_lock(dev-struct_mutex);
-   i915_gem_context_unreference(req-ctx);
-   mutex_unlock(dev-struct_mutex);
-
-   kfree(req);
-}
-
 static int execlists_context_queue(struct intel_engine_cs *ring,
   struct intel_context *to,
   u32 tail)
@@ -544,7 +528,6 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
i915_gem_context_reference(req-ctx);
req-ring = ring;
req-tail = tail;
-   INIT_WORK(req-work, execlists_free_request_task);
 
intel_runtime_pm_get(dev_priv);
 
@@ -565,7 +548,8 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
WARN(tail_req-elsp_submitted != 0,
 More than 2 already-submitted reqs queued\n);
list_del(tail_req-execlist_link);
-   queue_work(dev_priv-wq, tail_req-work);
+   list_add_tail(tail_req-execlist_link,
+   ring-execlist_retired_req_list);
}
}
 
@@ -733,6 +717,29 @@ int intel_execlists_submission(struct drm_device *dev, 
struct drm_file *file,
return 0;
 }
 
+void intel_execlists_retire_requests(struct intel_engine_cs *ring)
+{
+   struct

[Intel-gfx] [PATCH] drm/i915/bdw: Render state init for Execlists

2014-08-21 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

The batchbuffer that sets the render context state is submitted
in a different way, and from different places.

We needed to make both the render state preparation and free functions
outside accesible, and namespace accordingly. This mess is so that all
LR, LRC and Execlists functionality can go together in intel_lrc.c: we
can fix all of this later on, once the interfaces are clear.

v2: Create a separate ctx-rcs_initialized for the Execlists case, as
suggested by Chris Wilson.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com

v3: Setup ring status page in lr_context_deferred_create when the
default context is being created. This means that the render state
init for the default context is no longer a special case.  Execute
deferred creation of the default context at the end of
logical_ring_init to allow the render state commands to be submitted.
Fix style errors reported by checkpatch. Rebased.

Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h  |4 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c |   40 --
 drivers/gpu/drm/i915/i915_gem_render_state.h |   47 +
 drivers/gpu/drm/i915/intel_lrc.c |   73 --
 drivers/gpu/drm/i915/intel_lrc.h |2 +
 drivers/gpu/drm/i915/intel_renderstate.h |8 +--
 6 files changed, 135 insertions(+), 39 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_render_state.h

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e449f81..f416e341 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -37,6 +37,7 @@
 #include intel_ringbuffer.h
 #include intel_lrc.h
 #include i915_gem_gtt.h
+#include i915_gem_render_state.h
 #include linux/io-mapping.h
 #include linux/i2c.h
 #include linux/i2c-algo-bit.h
@@ -635,6 +636,7 @@ struct intel_context {
} legacy_hw_ctx;
 
/* Execlists */
+   bool rcs_initialized;
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
@@ -2596,8 +2598,6 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, 
void *data,
 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
   struct drm_file *file);
 
-/* i915_gem_render_state.c */
-int i915_gem_render_state_init(struct intel_engine_cs *ring);
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct drm_device *dev,
  struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
b/drivers/gpu/drm/i915/i915_gem_render_state.c
index e60be3f..a9a62d7 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -28,13 +28,6 @@
 #include i915_drv.h
 #include intel_renderstate.h
 
-struct render_state {
-   const struct intel_renderstate_rodata *rodata;
-   struct drm_i915_gem_object *obj;
-   u64 ggtt_offset;
-   int gen;
-};
-
 static const struct intel_renderstate_rodata *
 render_state_get_rodata(struct drm_device *dev, const int gen)
 {
@@ -127,30 +120,47 @@ static int render_state_setup(struct render_state *so)
return 0;
 }
 
-static void render_state_fini(struct render_state *so)
+void i915_gem_render_state_fini(struct render_state *so)
 {
i915_gem_object_ggtt_unpin(so-obj);
drm_gem_object_unreference(so-obj-base);
 }
 
-int i915_gem_render_state_init(struct intel_engine_cs *ring)
+int i915_gem_render_state_prepare(struct intel_engine_cs *ring,
+ struct render_state *so)
 {
-   struct render_state so;
int ret;
 
if (WARN_ON(ring-id != RCS))
return -ENOENT;
 
-   ret = render_state_init(so, ring-dev);
+   ret = render_state_init(so, ring-dev);
if (ret)
return ret;
 
-   if (so.rodata == NULL)
+   if (so-rodata == NULL)
return 0;
 
-   ret = render_state_setup(so);
+   ret = render_state_setup(so);
+   if (ret) {
+   i915_gem_render_state_fini(so);
+   return ret;
+   }
+
+   return 0;
+}
+
+int i915_gem_render_state_init(struct intel_engine_cs *ring)
+{
+   struct render_state so;
+   int ret;
+
+   ret = i915_gem_render_state_prepare(ring, so);
if (ret)
-   goto out;
+   return ret;
+
+   if (so.rodata == NULL)
+   return 0;
 
ret = ring-dispatch_execbuffer(ring,
so.ggtt_offset,
@@ -164,6 +174,6 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring)
ret = __i915_add_request(ring, NULL, so.obj, NULL);
/* __i915_add_request moves object to inactive if it fails */
 out:
-   render_state_fini(so);
+   i915_gem_render_state_fini(so);
return

[Intel-gfx] [PATCH] drm/i915/bdw: Do not initialize PPGTT in the legacy way for execlists

2014-08-20 Thread Thomas Daniel
A pending commit removes synchronous mode from switch_mm.  This breaks
execlists because switch_mm will always try to write to the legacy ring
buffer.

Return immediately from i915_ppgtt_init_gw in execlists mode.
No longer check for execlists mode in gen8_ppgtt_enable() because this
will no longer be called in execlists mode.

Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_gtt.c |   12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 22ad38b..00267b3 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -777,12 +777,6 @@ static void gen8_ppgtt_enable(struct drm_device *dev)
struct intel_engine_cs *ring;
int j;
 
-   /* In the case of execlists, PPGTT is enabled by the context descriptor
-* and the PDPs are contained within the context itself.  We don't
-* need to do anything here. */
-   if (i915.enable_execlists)
-   return;
-
for_each_ring(ring, dev_priv, j) {
I915_WRITE(RING_MODE_GEN7(ring),
   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
@@ -1126,6 +1120,12 @@ int i915_ppgtt_init_hw(struct drm_device *dev)
struct i915_hw_ppgtt *ppgtt = dev_priv-mm.aliasing_ppgtt;
int i, ret = 0;
 
+   /* In the case of execlists, PPGTT is enabled by the context descriptor
+* and the PDPs are contained within the context itself.  We don't
+* need to do anything here. */
+   if (i915.enable_execlists)
+   return 0;
+
if (!USES_PPGTT(dev))
return 0;
 
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/bdw: Render moot context reset and switch with Execlists

2014-08-20 Thread Thomas Daniel
These two functions make no sense in an Logical Ring Context  Execlists
world.

v2: We got rid of lrc_enabled and centralized everything in the sanitized
i915.enable_execlists instead.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com

v3: Rebased.  Corrected a typo in comment for i915_switch_context and
added a comment that it should not be called in execlist mode. Added
WARN_ON if i915_switch_context is called in execlist mode. Moved check
for execlist mode out of i915_switch_context and into callers. Added
comment in context_reset explaining why nothing is done in execlist
mode.

Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c |8 +---
 drivers/gpu/drm/i915/i915_gem_context.c |   16 +++-
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cb9310b..954a5f9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2981,9 +2981,11 @@ int i915_gpu_idle(struct drm_device *dev)
 
/* Flush everything onto the inactive list. */
for_each_ring(ring, dev_priv, i) {
-   ret = i915_switch_context(ring, ring-default_context);
-   if (ret)
-   return ret;
+   if (!i915.enable_execlists) {
+   ret = i915_switch_context(ring, ring-default_context);
+   if (ret)
+   return ret;
+   }
 
ret = intel_ring_idle(ring);
if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 0fdb357..3face51 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -289,6 +289,12 @@ void i915_gem_context_reset(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev-dev_private;
int i;
 
+   /* In execlists mode we will unreference the context when the execlist
+* queue is cleared and the requests destroyed.
+*/
+   if (i915.enable_execlists)
+   return;
+
for (i = 0; i  I915_NUM_RINGS; i++) {
struct intel_engine_cs *ring = dev_priv-ring[i];
struct intel_context *lctx = ring-last_context;
@@ -397,6 +403,9 @@ int i915_gem_context_enable(struct drm_i915_private 
*dev_priv)
 
BUG_ON(!dev_priv-ring[RCS].default_context);
 
+   if (i915.enable_execlists)
+   return 0;
+
for_each_ring(ring, dev_priv, i) {
ret = i915_switch_context(ring, ring-default_context);
if (ret)
@@ -637,14 +646,19 @@ unpin_out:
  *
  * The context life cycle is simple. The context refcount is incremented and
  * decremented by 1 and create and destroy. If the context is in use by the 
GPU,
- * it will have a refoucnt  1. This allows us to destroy the context abstract
+ * it will have a refcount  1. This allows us to destroy the context abstract
  * object while letting the normal object tracking destroy the backing BO.
+ *
+ * This function should not be used in execlists mode.  Instead the context is
+ * switched by writing to the ELSP and requests keep a reference to their
+ * context.
  */
 int i915_switch_context(struct intel_engine_cs *ring,
struct intel_context *to)
 {
struct drm_i915_private *dev_priv = ring-dev-dev_private;
 
+   WARN_ON(i915.enable_execlists);
WARN_ON(!mutex_is_locked(dev_priv-dev-struct_mutex));
 
if (to-legacy_hw_ctx.rcs_state == NULL) { /* We have the fake context 
*/
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/bdw: Populate lrc with aliasing ppgtt if required

2014-08-18 Thread Thomas Daniel
A previous commit broke aliasing PPGTT for lrc, resulting in a kernel oops
on boot. Add a check so that is full PPGTT is not in use the context is
populated with the aliasing PPGTT.

Issue: VIZ-4278
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c |7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c096b9b..79a6b91 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1452,12 +1452,19 @@ static int
 populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object 
*ctx_obj,
struct intel_engine_cs *ring, struct intel_ringbuffer 
*ringbuf)
 {
+   struct drm_device *dev = ring-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
struct drm_i915_gem_object *ring_obj = ringbuf-obj;
struct i915_hw_ppgtt *ppgtt = ctx-ppgtt;
struct page *page;
uint32_t *reg_state;
int ret;
 
+   if (USES_FULL_PPGTT(dev))
+   ppgtt = ctx-ppgtt;
+   else
+   ppgtt = dev_priv-mm.aliasing_ppgtt;
+
ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
if (ret) {
DRM_DEBUG_DRIVER(Could not set to CPU domain\n);
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/bdw: Don't write PDP in the legacy way when using LRCs

2014-08-15 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

This is mostly for correctness so that we know we are running the LR
context correctly (this is, the PDPs are contained inside the context
object).

v2: Move the check to inside the enable PPGTT function. The switch
happens in two places: the legacy context switch (that we won't hit
when Execlists are enabled) and the PPGTT enable, which unfortunately
we need. This would look much nicer if the ppgtt-enable was part of
the ring init, where it logically belongs.

v3: Move the check to the start of the enable PPGTT function.  None
of the legacy PPGTT enabling is required when using LRCs as the
PPGTT is enabled in the context descriptor and the PDPs are written
in the LRC.

v4: Clarify comment based on review feedback.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_gtt.c |6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5188936..2966b53 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -843,6 +843,12 @@ static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
struct intel_engine_cs *ring;
int j, ret;
 
+   /* In the case of execlists, PPGTT is enabled by the context descriptor
+* and the PDPs are contained within the context itself.  We don't
+* need to do anything here. */
+   if (i915.enable_execlists)
+   return 0;
+
for_each_ring(ring, dev_priv, j) {
I915_WRITE(RING_MODE_GEN7(ring),
   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 12/43] drm/i915/bdw: Don't write PDP in the legacy way when using LRCs

2014-08-07 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

This is mostly for correctness so that we know we are running the LR
context correctly (this is, the PDPs are contained inside the context
object).

v2: Move the check to inside the enable PPGTT function. The switch
happens in two places: the legacy context switch (that we won't hit
when Execlists are enabled) and the PPGTT enable, which unfortunately
we need. This would look much nicer if the ppgtt-enable was part of
the ring init, where it logically belongs.

v3: Move the check to the start of the enable PPGTT function.  None
of the legacy PPGTT enabling is required when using LRCs as the
PPGTT is enabled in the context descriptor and the PDPs are written
in the LRC.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_gtt.c |5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5188936..cfbf272 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -843,6 +843,11 @@ static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
struct intel_engine_cs *ring;
int j, ret;
 
+   /* In the case of Execlists, we don't want to write the PDPs
+* in the legacy way (they live inside the context now) */
+   if (i915.enable_execlists)
+   return 0;
+
for_each_ring(ring, dev_priv, j) {
I915_WRITE(RING_MODE_GEN7(ring),
   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 37/43] drm/i915/bdw: Display execlists info in debugfs

2014-08-07 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

v2: Warn and return if LRCs are not enabled.

v3: Grab the Execlists spinlock (noticed by Daniel Vetter).

Signed-off-by: Oscar Mateo oscar.ma...@intel.com

v4: Lock the struct mutex for atomic state capture

Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c |   80 +++
 drivers/gpu/drm/i915/intel_lrc.c|6 ---
 drivers/gpu/drm/i915/intel_lrc.h|7 +++
 3 files changed, 87 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index fc39610..f8f0e11 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1674,6 +1674,85 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
return 0;
 }
 
+static int i915_execlists(struct seq_file *m, void *data)
+{
+   struct drm_info_node *node = (struct drm_info_node *) m-private;
+   struct drm_device *dev = node-minor-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct intel_engine_cs *ring;
+   u32 status_pointer;
+   u8 read_pointer;
+   u8 write_pointer;
+   u32 status;
+   u32 ctx_id;
+   struct list_head *cursor;
+   int ring_id, i;
+   int ret;
+
+   if (!i915.enable_execlists) {
+   seq_printf(m, Logical Ring Contexts are disabled\n);
+   return 0;
+   }
+
+   ret = mutex_lock_interruptible(dev-struct_mutex);
+   if (ret)
+   return ret;
+
+   for_each_ring(ring, dev_priv, ring_id) {
+   struct intel_ctx_submit_request *head_req = NULL;
+   int count = 0;
+   unsigned long flags;
+
+   seq_printf(m, %s\n, ring-name);
+
+   status = I915_READ(RING_EXECLIST_STATUS(ring));
+   ctx_id = I915_READ(RING_EXECLIST_STATUS(ring) + 4);
+   seq_printf(m, \tExeclist status: 0x%08X, context: %u\n,
+   status, ctx_id);
+
+   status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
+   seq_printf(m, \tStatus pointer: 0x%08X\n, status_pointer);
+
+   read_pointer = ring-next_context_status_buffer;
+   write_pointer = status_pointer  0x07;
+   if (read_pointer  write_pointer)
+   write_pointer += 6;
+   seq_printf(m, \tRead pointer: 0x%08X, write pointer 0x%08X\n,
+   read_pointer, write_pointer);
+
+   for (i = 0; i  6; i++) {
+   status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + 8*i);
+   ctx_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + 8*i 
+ 4);
+
+   seq_printf(m, \tStatus buffer %d: 0x%08X, context: 
%u\n,
+   i, status, ctx_id);
+   }
+
+   spin_lock_irqsave(ring-execlist_lock, flags);
+   list_for_each(cursor, ring-execlist_queue)
+   count++;
+   head_req = list_first_entry_or_null(ring-execlist_queue,
+   struct intel_ctx_submit_request, execlist_link);
+   spin_unlock_irqrestore(ring-execlist_lock, flags);
+
+   seq_printf(m, \t%d requests in queue\n, count);
+   if (head_req) {
+   struct drm_i915_gem_object *ctx_obj;
+
+   ctx_obj = head_req-ctx-engine[ring_id].state;
+   seq_printf(m, \tHead request id: %u\n,
+   intel_execlists_ctx_id(ctx_obj));
+   seq_printf(m, \tHead request tail: %u\n, 
head_req-tail);
+   }
+
+   seq_putc(m, '\n');
+   }
+
+   mutex_unlock(dev-struct_mutex);
+
+   return 0;
+}
+
 static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data)
 {
struct drm_info_node *node = m-private;
@@ -3899,6 +3978,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{i915_opregion, i915_opregion, 0},
{i915_gem_framebuffer, i915_gem_framebuffer_info, 0},
{i915_context_status, i915_context_status, 0},
+   {i915_execlists, i915_execlists, 0},
{i915_gen6_forcewake_count, i915_gen6_forcewake_count_info, 0},
{i915_swizzle_info, i915_swizzle_info, 0},
{i915_ppgtt_info, i915_ppgtt_info, 0},
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 829b15d..8056fa4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -46,12 +46,6 @@
 
 #define GEN8_LR_CONTEXT_ALIGN 4096
 
-#define RING_ELSP(ring)((ring)-mmio_base+0x230)
-#define RING_EXECLIST_STATUS(ring) ((ring)-mmio_base+0x234)
-#define RING_CONTEXT_CONTROL(ring) ((ring)-mmio_base+0x244)
-#define RING_CONTEXT_STATUS_BUF(ring)  ((ring)-mmio_base+0x370

[Intel-gfx] [PATCH 39/43] drm/i915/bdw: Print context state in debugfs

2014-08-07 Thread Thomas Daniel
From: Ben Widawsky b...@bwidawsk.net

This has turned out to be really handy in debug so far.

Update:
Since writing this patch, I've gotten similar code upstream for error
state. I've used it quite a bit in debugfs however, and I'd like to keep
it here at least until preemption is working.

Signed-off-by: Ben Widawsky b...@bwidawsk.net

This patch was accidentally dropped in the first Execlists version, and
it has been very useful indeed. Put it back again, but as a standalone
debugfs file.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com

v2: Take the device struct_mutex rather than mode_config mutex for
atomic state capture.

Signed-off-by: Thomas Daniel thomas.dan...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c |   52 +++
 1 file changed, 52 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index aca5ff1..a3c958c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1695,6 +1695,57 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
return 0;
 }
 
+static int i915_dump_lrc(struct seq_file *m, void *unused)
+{
+   struct drm_info_node *node = (struct drm_info_node *) m-private;
+   struct drm_device *dev = node-minor-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct intel_engine_cs *ring;
+   struct intel_context *ctx;
+   int ret, i;
+
+   if (!i915.enable_execlists) {
+   seq_printf(m, Logical Ring Contexts are disabled\n);
+   return 0;
+   }
+
+   ret = mutex_lock_interruptible(dev-struct_mutex);
+   if (ret)
+   return ret;
+
+   list_for_each_entry(ctx, dev_priv-context_list, link) {
+   for_each_ring(ring, dev_priv, i) {
+   struct drm_i915_gem_object *ctx_obj = 
ctx-engine[i].state;
+
+   if (ring-default_context == ctx)
+   continue;
+
+   if (ctx_obj) {
+   struct page *page = 
i915_gem_object_get_page(ctx_obj, 1);
+   uint32_t *reg_state = kmap_atomic(page);
+   int j;
+
+   seq_printf(m, CONTEXT: %s %u\n, ring-name,
+   
intel_execlists_ctx_id(ctx_obj));
+
+   for (j = 0; j  0x600 / sizeof(u32) / 4; j += 
4) {
+   seq_printf(m, \t[0x%08lx] 0x%08x 
0x%08x 0x%08x 0x%08x\n,
+   i915_gem_obj_ggtt_offset(ctx_obj) + 
4096 + (j * 4),
+   reg_state[j], reg_state[j + 1],
+   reg_state[j + 2], reg_state[j + 3]);
+   }
+   kunmap_atomic(reg_state);
+
+   seq_putc(m, '\n');
+   }
+   }
+   }
+
+   mutex_unlock(dev-struct_mutex);
+
+   return 0;
+}
+
 static int i915_execlists(struct seq_file *m, void *data)
 {
struct drm_info_node *node = (struct drm_info_node *) m-private;
@@ -3999,6 +4050,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{i915_opregion, i915_opregion, 0},
{i915_gem_framebuffer, i915_gem_framebuffer_info, 0},
{i915_context_status, i915_context_status, 0},
+   {i915_dump_lrc, i915_dump_lrc, 0},
{i915_execlists, i915_execlists, 0},
{i915_gen6_forcewake_count, i915_gen6_forcewake_count_info, 0},
{i915_swizzle_info, i915_swizzle_info, 0},
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 09/43] drm/i915/bdw: Populate LR contexts (somewhat)

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

For the most part, logical ring context objects are similar to hardware
contexts in that the backing object is meant to be opaque. There are
some exceptions where we need to poke certain offsets of the object for
initialization, updating the tail pointer or updating the PDPs.

For our basic execlist implementation we'll only need our PPGTT PDs,
and ringbuffer addresses in order to set up the context. With previous
patches, we have both, so start prepping the context to be load.

Before running a context for the first time you must populate some
fields in the context object. These fields begin 1 PAGE + LRCA, ie. the
first page (in 0 based counting) of the context  image. These same
fields will be read and written to as contexts are saved and restored
once the system is up and running.

Many of these fields are completely reused from previous global
registers: ringbuffer head/tail/control, context control matches some
previous MI_SET_CONTEXT flags, and page directories. There are other
fields which we don't touch which we may want in the future.

v2: CTX_LRI_HEADER_0 is MI_LOAD_REGISTER_IMM(14) for render and (11)
for other engines.

v3: Several rebases and general changes to the code.

v4: Squash with Extract LR context object populating
Also, Damien's review comments:
- Set the Force Posted bit on the LRI header, as the BSpec suggest we do.
- Prevent warning when compiling a 32-bits kernel without HIGHMEM64.
- Add a clarifying comment to the context population code.

v5: Damien's review comments:
- The third MI_LOAD_REGISTER_IMM in the context does not set Force Posted.
- Remove dead code.

v6: Add a note about the (presumed) differences between BDW and CHV state
contexts. Also, Brad's review comments:
- Use the _MASKED_BIT_ENABLE, upper_32_bits and lower_32_bits macros.
- Be less magical about how we set the ring size in the context.

Signed-off-by: Ben Widawsky b...@bwidawsk.net (v1)
Signed-off-by: Rafael Barbalho rafael.barba...@intel.com (v2)
Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_reg.h  |1 +
 drivers/gpu/drm/i915/intel_lrc.c |  159 +-
 2 files changed, 156 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ce70aa4..043a6ea 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -282,6 +282,7 @@
  *   address/value pairs. Don't overdue it, though, x = 2^4 must hold!
  */
 #define MI_LOAD_REGISTER_IMM(x)MI_INSTR(0x22, 2*(x)-1)
+#define   MI_LRI_FORCE_POSTED  (112)
 #define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*(x)-1)
 #define MI_STORE_REGISTER_MEM_GEN8(x) MI_INSTR(0x24, 3*(x)-1)
 #define   MI_SRM_LRM_GLOBAL_GTT(122)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 2eb7db6..cf322ec 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -46,6 +46,38 @@
 
 #define GEN8_LR_CONTEXT_ALIGN 4096
 
+#define RING_ELSP(ring)((ring)-mmio_base+0x230)
+#define RING_CONTEXT_CONTROL(ring) ((ring)-mmio_base+0x244)
+
+#define CTX_LRI_HEADER_0   0x01
+#define CTX_CONTEXT_CONTROL0x02
+#define CTX_RING_HEAD  0x04
+#define CTX_RING_TAIL  0x06
+#define CTX_RING_BUFFER_START  0x08
+#define CTX_RING_BUFFER_CONTROL0x0a
+#define CTX_BB_HEAD_U  0x0c
+#define CTX_BB_HEAD_L  0x0e
+#define CTX_BB_STATE   0x10
+#define CTX_SECOND_BB_HEAD_U   0x12
+#define CTX_SECOND_BB_HEAD_L   0x14
+#define CTX_SECOND_BB_STATE0x16
+#define CTX_BB_PER_CTX_PTR 0x18
+#define CTX_RCS_INDIRECT_CTX   0x1a
+#define CTX_RCS_INDIRECT_CTX_OFFSET0x1c
+#define CTX_LRI_HEADER_1   0x21
+#define CTX_CTX_TIMESTAMP  0x22
+#define CTX_PDP3_UDW   0x24
+#define CTX_PDP3_LDW   0x26
+#define CTX_PDP2_UDW   0x28
+#define CTX_PDP2_LDW   0x2a
+#define CTX_PDP1_UDW   0x2c
+#define CTX_PDP1_LDW   0x2e
+#define CTX_PDP0_UDW   0x30
+#define CTX_PDP0_LDW   0x32
+#define CTX_LRI_HEADER_2   0x41
+#define CTX_R_PWR_CLK_STATE0x42
+#define CTX_GPGPU_CSR_BASE_ADDRESS 0x44
+
 int intel_sanitize_enable_execlists(struct drm_device *dev, int 
enable_execlists)
 {
if (enable_execlists == 0)
@@ -57,6 +89,115 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, 
int enable_execlists
return 0;
 }
 
+static int
+populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object 
*ctx_obj,
+   struct intel_engine_cs *ring, struct intel_ringbuffer 
*ringbuf)
+{
+   struct drm_i915_gem_object *ring_obj = ringbuf-obj;
+   struct i915_hw_ppgtt 

[Intel-gfx] [PATCH 08/43] drm/i915/bdw: Add a context and an engine pointers to the ringbuffer

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Any given ringbuffer is unequivocally tied to one context and one engine.
By setting the appropriate pointers to them, the ringbuffer struct holds
all the infromation you might need to submit a workload for processing,
Execlists style.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c|2 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c |2 ++
 drivers/gpu/drm/i915/intel_ringbuffer.h |3 +++
 3 files changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0a12b8c..2eb7db6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -132,6 +132,8 @@ int intel_lr_context_deferred_create(struct intel_context 
*ctx,
return ret;
}
 
+   ringbuf-ring = ring;
+   ringbuf-ctx = ctx;
ringbuf-size = 32 * PAGE_SIZE;
ringbuf-effective_size = ringbuf-size;
ringbuf-head = 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 01e9840..279dda4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1570,6 +1570,8 @@ static int intel_init_ring_buffer(struct drm_device *dev,
INIT_LIST_HEAD(ring-active_list);
INIT_LIST_HEAD(ring-request_list);
ringbuf-size = 32 * PAGE_SIZE;
+   ringbuf-ring = ring;
+   ringbuf-ctx = ring-default_context;
memset(ring-semaphore.sync_seqno, 0, 
sizeof(ring-semaphore.sync_seqno));
 
init_waitqueue_head(ring-irq_queue);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 053d004..be40788 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -88,6 +88,9 @@ struct intel_ringbuffer {
struct drm_i915_gem_object *obj;
void __iomem *virtual_start;
 
+   struct intel_engine_cs *ring;
+   struct intel_context *ctx;
+
u32 head;
u32 tail;
int space;
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 00/43] Execlists v5

2014-07-24 Thread Thomas Daniel
From: Thomas Daniel thomas.dan...@intel.com

For a description of this patchset, please check the previous cover letters: 
[1], [2], [3] and [4].

I have taken ownership of this patchset from Oscar, and this version represents 
his last work on the execlists patchset.  The narrative below is from him.

I have been given some grace period to fix the remaining issues in Execlists 
before I move to a different project, and this is the result. There are very 
little differences between this v5 and the v4 I sent out last week, so I was 
unsure whether to drop a new patchbomb or simply reply to the patches that have 
changed, but I decided for the former to make the review easier.

The changes are:

- New prep-work patch to prevent a potential problem with the legacy ringbuffer 
submission extraction that was done earlier.
- Do the remaining intel_runtime_put while purging the execlists queue during 
reset.
- Check arguments before doing stuff in intel_execlists_submission. Also, get 
rel_constants parsing right.
- Do gen8_emit_flush = gen6_ring_flush + gen6_bsd_ring_flush.
- New patches for pinning context and ringbuffer backing objects on-demand 
(before I was pinning on interrupt time, which was a no-no). These fix the 
reamining eviction issues I was seeing.

The previous comment about the WAs still applies. I reproduce it here for 
completeness:

One other caveat I have noticed is that many WAs in gen8_init_clock_gating 
(those that affect registers that now exist per-context) can get lost in the 
render default context. The reason is, in Execlists, a context is saved as soon 
as head = tail (with MI_SET_CONTEXT, however, the context wouldn't be saved 
until you tried to restore a different context). As we are sending the golden 
state batchbuffer to the render ring as soon as the rings are initialized, we 
are effectively saving the default context before gen8_init_clock_gating has an 
opportunity to set the WAs. I haven't noticed any ill-effect from this (yet) 
but it would be a good idea to move the WAs somewhere else (ring init looks 
like a good place). I believe there is already work in progress to create a new 
WA architecture, so this can be tackled there.

The previous IGT test [4] still applies.

There are three pending issues:

- The test gem_close_race warns about scheduling while atomic when the 
shrinker gets called. Without Execlists, the shrinker does not get called at 
all (which kind of makes sense) but the tests timeouts before finishing.
- The test gem_concurrent_blit fails in the gtt-* subtests: some pixels (14, to 
be exact) do not get copied correctly from one bo to another. Funnily enough, 
the tests pass if I do a i915 module reload first (./tests/drv_module_reload). 
Yesterday I dumped all the registers in the chip before and after a module 
reload (attached), but I havenŽt found any meaningful difference yet.
- When I try to run a whole IGT suite using Piglit, sometimes I hit the 
BUG_ON(!i915_gem_obj_is_pinned(ctx_obj0)) in execlists_submit_context(). I 
havenŽt managed to reproduce the problem at will, but there is obviously 
something wrong with the last two Execlists patches.

Keep the r-b tags coming, please!!

-- Oscar

[1]
http://lists.freedesktop.org/archives/intel-gfx/2014-March/042563.html
[2]
http://lists.freedesktop.org/archives/intel-gfx/2014-May/044847.html
[3]
http://lists.freedesktop.org/archives/intel-gfx/2014-June/047138.html
[4]
http://lists.freedesktop.org/archives/intel-gfx/2014-July/048944.html
[5]
http://lists.freedesktop.org/archives/intel-gfx/2014-May/044846.html

Ben Widawsky (2):
  drm/i915/bdw: Implement context switching (somewhat)
  drm/i915/bdw: Print context state in debugfs

Michel Thierry (1):
  drm/i915/bdw: Two-stage execlist submit process

Oscar Mateo (39):
  drm/i915: Reorder the actual workload submission so that args checking
is done earlier
  drm/i915/bdw: New source and header file for LRs, LRCs and Execlists
  drm/i915/bdw: Macro for LRCs and module option for Execlists
  drm/i915/bdw: Initialization for Logical Ring Contexts
  drm/i915/bdw: Introduce one context backing object per engine
  drm/i915/bdw: A bit more advanced LR context alloc/free
  drm/i915/bdw: Allocate ringbuffers for Logical Ring Contexts
  drm/i915/bdw: Add a context and an engine pointers to the ringbuffer
  drm/i915/bdw: Populate LR contexts (somewhat)
  drm/i915/bdw: Deferred creation of user-created LRCs
  drm/i915/bdw: Render moot context reset and switch with Execlists
  drm/i915/bdw: Don't write PDP in the legacy way when using LRCs
  drm/i915: Abstract the legacy workload submission mechanism away
  drm/i915/bdw: Skeleton for the new logical rings submission path
  drm/i915/bdw: Generic logical ring init and cleanup
  drm/i915/bdw: GEN-specific logical ring init
  drm/i915/bdw: GEN-specific logical ring set/get seqno
  drm/i915/bdw: New logical ring submission mechanism
  drm/i915/bdw: GEN-specific logical ring emit request
  drm/i915/bdw: GEN-specific

[Intel-gfx] [PATCH 04/43] drm/i915/bdw: Initialization for Logical Ring Contexts

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

For the moment this is just a placeholder, but it shows one of the
main differences between the good ol' HW contexts and the shiny
new Logical Ring Contexts: LR contexts allocate  and free their
own backing objects. Another difference is that the allocation is
deferred (as the create function name suggests), but that does not
happen in this patch yet, because for the moment we are only dealing
with the default context.

Early in the series we had our own gen8_gem_context_init/fini
functions, but the truth is they now look almost the same as the
legacy hw context init/fini functions. We can always split them
later if this ceases to be the case.

Also, we do not fall back to legacy ringbuffers when logical ring
context initialization fails (not very likely to happen and, even
if it does, hw contexts would probably fail as well).

v2: Daniel says explain, do not showcase.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_context.c |   29 +++--
 drivers/gpu/drm/i915/intel_lrc.c|   15 +++
 drivers/gpu/drm/i915/intel_lrc.h|5 +
 3 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index de72a28..718150e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -182,7 +182,10 @@ void i915_gem_context_free(struct kref *ctx_ref)
   typeof(*ctx), ref);
struct i915_hw_ppgtt *ppgtt = NULL;
 
-   if (ctx-legacy_hw_ctx.rcs_state) {
+   if (i915.enable_execlists) {
+   ppgtt = ctx_to_ppgtt(ctx);
+   intel_lr_context_free(ctx);
+   } else if (ctx-legacy_hw_ctx.rcs_state) {
/* We refcount even the aliasing PPGTT to keep the code 
symmetric */
if (USES_PPGTT(ctx-legacy_hw_ctx.rcs_state-base.dev))
ppgtt = ctx_to_ppgtt(ctx);
@@ -419,7 +422,11 @@ int i915_gem_context_init(struct drm_device *dev)
if (WARN_ON(dev_priv-ring[RCS].default_context))
return 0;
 
-   if (HAS_HW_CONTEXTS(dev)) {
+   if (i915.enable_execlists) {
+   /* NB: intentionally left blank. We will allocate our own
+* backing objects as we need them, thank you very much */
+   dev_priv-hw_context_size = 0;
+   } else if (HAS_HW_CONTEXTS(dev)) {
dev_priv-hw_context_size = round_up(get_context_size(dev), 
4096);
if (dev_priv-hw_context_size  (120)) {
DRM_DEBUG_DRIVER(Disabling HW Contexts; invalid size 
%d\n,
@@ -435,11 +442,20 @@ int i915_gem_context_init(struct drm_device *dev)
return PTR_ERR(ctx);
}
 
-   /* NB: RCS will hold a ref for all rings */
-   for (i = 0; i  I915_NUM_RINGS; i++)
-   dev_priv-ring[i].default_context = ctx;
+   for (i = 0; i  I915_NUM_RINGS; i++) {
+   struct intel_engine_cs *ring = dev_priv-ring[i];
+
+   /* NB: RCS will hold a ref for all rings */
+   ring-default_context = ctx;
+
+   /* FIXME: we really only want to do this for initialized rings 
*/
+   if (i915.enable_execlists)
+   intel_lr_context_deferred_create(ctx, ring);
+   }
 
-   DRM_DEBUG_DRIVER(%s context support initialized\n, 
dev_priv-hw_context_size ? HW : fake);
+   DRM_DEBUG_DRIVER(%s context support initialized\n,
+   i915.enable_execlists ? LR :
+   dev_priv-hw_context_size ? HW : fake);
return 0;
 }
 
@@ -781,6 +797,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, 
void *data,
struct intel_context *ctx;
int ret;
 
+   /* FIXME: allow user-created LR contexts as well */
if (!hw_context_enabled(dev))
return -ENODEV;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 21f7f1c..8cc6b55 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -51,3 +51,18 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, 
int enable_execlists
 
return 0;
 }
+
+void intel_lr_context_free(struct intel_context *ctx)
+{
+   /* TODO */
+}
+
+int intel_lr_context_deferred_create(struct intel_context *ctx,
+struct intel_engine_cs *ring)
+{
+   BUG_ON(ctx-legacy_hw_ctx.rcs_state != NULL);
+
+   /* TODO */
+
+   return 0;
+}
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 75ee9c3..3b93572 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -24,6 +24,11 @@
 #ifndef _INTEL_LRC_H_
 #define _INTEL_LRC_H_
 
+/* Logical Ring Contexts */
+void intel_lr_context_free(struct intel_context *ctx);
+int 

[Intel-gfx] [PATCH 03/43] drm/i915/bdw: Macro for LRCs and module option for Execlists

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

GEN8 brings an expansion of the HW contexts: Logical Ring Contexts.
These expanded contexts enable a number of new abilities, especially
Execlists.

The macro is defined to off until we have things in place to hope to
work.

v2: Rename advanced contexts to the more correct logical ring
contexts.

v3: Add a module parameter to enable execlists. Execlist are relatively
new, and so it'd be wise to be able to switch back to ring submission
to debug subtle problems that will inevitably arise.

v4: Add an intel_enable_execlists function.

v5: Sanitize early, as suggested by Daniel. Remove lrc_enabled.

Signed-off-by: Ben Widawsky b...@bwidawsk.net (v1)
Signed-off-by: Damien Lespiau damien.lesp...@intel.com (v3)
Signed-off-by: Oscar Mateo oscar.ma...@intel.com (v2, v4  v5)
---
 drivers/gpu/drm/i915/i915_drv.h|2 ++
 drivers/gpu/drm/i915/i915_gem.c|3 +++
 drivers/gpu/drm/i915/i915_params.c |6 ++
 drivers/gpu/drm/i915/intel_lrc.c   |   11 +++
 drivers/gpu/drm/i915/intel_lrc.h   |3 +++
 5 files changed, 25 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 54c2bd9..a793d6d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2037,6 +2037,7 @@ struct drm_i915_cmd_table {
 #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)-need_gfx_hws)
 
 #define HAS_HW_CONTEXTS(dev)   (INTEL_INFO(dev)-gen = 6)
+#define HAS_LOGICAL_RING_CONTEXTS(dev) 0
 #define HAS_ALIASING_PPGTT(dev)(INTEL_INFO(dev)-gen = 6)
 #define HAS_PPGTT(dev) (INTEL_INFO(dev)-gen = 7  !IS_GEN8(dev))
 #define USES_PPGTT(dev)intel_enable_ppgtt(dev, false)
@@ -2122,6 +2123,7 @@ struct i915_params {
int enable_rc6;
int enable_fbc;
int enable_ppgtt;
+   int enable_execlists;
int enable_psr;
unsigned int preliminary_hw_support;
int disable_power_well;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e5d4d73..d8bf4fa 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4746,6 +4746,9 @@ int i915_gem_init(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev-dev_private;
int ret;
 
+   i915.enable_execlists = intel_sanitize_enable_execlists(dev,
+   i915.enable_execlists);
+
mutex_lock(dev-struct_mutex);
 
if (IS_VALLEYVIEW(dev)) {
diff --git a/drivers/gpu/drm/i915/i915_params.c 
b/drivers/gpu/drm/i915/i915_params.c
index bbdee21..7f0fb72 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -35,6 +35,7 @@ struct i915_params i915 __read_mostly = {
.vbt_sdvo_panel_type = -1,
.enable_rc6 = -1,
.enable_fbc = -1,
+   .enable_execlists = -1,
.enable_hangcheck = true,
.enable_ppgtt = -1,
.enable_psr = 1,
@@ -117,6 +118,11 @@ MODULE_PARM_DESC(enable_ppgtt,
Override PPGTT usage. 
(-1=auto [default], 0=disabled, 1=aliasing, 2=full));
 
+module_param_named(enable_execlists, i915.enable_execlists, int, 0400);
+MODULE_PARM_DESC(enable_execlists,
+   Override execlists usage. 
+   (-1=auto [default], 0=disabled, 1=enabled));
+
 module_param_named(enable_psr, i915.enable_psr, int, 0600);
 MODULE_PARM_DESC(enable_psr, Enable PSR (default: true));
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 49bb6fc..21f7f1c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -40,3 +40,14 @@
 #include drm/drmP.h
 #include drm/i915_drm.h
 #include i915_drv.h
+
+int intel_sanitize_enable_execlists(struct drm_device *dev, int 
enable_execlists)
+{
+   if (enable_execlists == 0)
+   return 0;
+
+   if (HAS_LOGICAL_RING_CONTEXTS(dev)  USES_PPGTT(dev))
+   return 1;
+
+   return 0;
+}
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index f6830a4..75ee9c3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -24,4 +24,7 @@
 #ifndef _INTEL_LRC_H_
 #define _INTEL_LRC_H_
 
+/* Execlists */
+int intel_sanitize_enable_execlists(struct drm_device *dev, int 
enable_execlists);
+
 #endif /* _INTEL_LRC_H_ */
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 01/43] drm/i915: Reorder the actual workload submission so that args checking is done earlier

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

In this patch:

commit 78382593e921c88371abd019aca8978db3248a8f
Author: Oscar Mateo oscar.ma...@intel.com
Date:   Thu Jul 3 16:28:05 2014 +0100

drm/i915: Extract the actual workload submission mechanism from execbuffer

So that we isolate the legacy ringbuffer submission mechanism, which becomes
a good candidate to be abstracted away. This is prep-work for Execlists 
(which
will its own workload submission mechanism).

No functional changes.

I changed the order in which the args checking is done. I don't know why I did 
(brain
fade?) but itś not right. I haven't seen any ill effect from this, but the 
Execlists
version of this function will have problems if the order is not correct.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   86 ++--
 1 file changed, 43 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 60998fc..c5115957 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1042,6 +1042,43 @@ legacy_ringbuffer_submission(struct drm_device *dev, 
struct drm_file *file,
u32 instp_mask;
int i, ret = 0;
 
+   instp_mode = args-flags  I915_EXEC_CONSTANTS_MASK;
+   instp_mask = I915_EXEC_CONSTANTS_MASK;
+   switch (instp_mode) {
+   case I915_EXEC_CONSTANTS_REL_GENERAL:
+   case I915_EXEC_CONSTANTS_ABSOLUTE:
+   case I915_EXEC_CONSTANTS_REL_SURFACE:
+   if (instp_mode != 0  ring != dev_priv-ring[RCS]) {
+   DRM_DEBUG(non-0 rel constants mode on non-RCS\n);
+   ret = -EINVAL;
+   goto error;
+   }
+
+   if (instp_mode != dev_priv-relative_constants_mode) {
+   if (INTEL_INFO(dev)-gen  4) {
+   DRM_DEBUG(no rel constants on pre-gen4\n);
+   ret = -EINVAL;
+   goto error;
+   }
+
+   if (INTEL_INFO(dev)-gen  5 
+   instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
+   DRM_DEBUG(rel surface constants mode invalid 
on gen5+\n);
+   ret = -EINVAL;
+   goto error;
+   }
+
+   /* The HW changed the meaning on this bit on gen6 */
+   if (INTEL_INFO(dev)-gen = 6)
+   instp_mask = ~I915_EXEC_CONSTANTS_REL_SURFACE;
+   }
+   break;
+   default:
+   DRM_DEBUG(execbuf with unknown constants: %d\n, instp_mode);
+   ret = -EINVAL;
+   goto error;
+   }
+
if (args-num_cliprects != 0) {
if (ring != dev_priv-ring[RCS]) {
DRM_DEBUG(clip rectangles are only valid with the 
render ring\n);
@@ -1085,6 +1122,12 @@ legacy_ringbuffer_submission(struct drm_device *dev, 
struct drm_file *file,
}
}
 
+   if (args-flags  I915_EXEC_GEN7_SOL_RESET) {
+   ret = i915_reset_gen7_sol_offsets(dev, ring);
+   if (ret)
+   goto error;
+   }
+
ret = i915_gem_execbuffer_move_to_gpu(ring, vmas);
if (ret)
goto error;
@@ -1093,43 +1136,6 @@ legacy_ringbuffer_submission(struct drm_device *dev, 
struct drm_file *file,
if (ret)
goto error;
 
-   instp_mode = args-flags  I915_EXEC_CONSTANTS_MASK;
-   instp_mask = I915_EXEC_CONSTANTS_MASK;
-   switch (instp_mode) {
-   case I915_EXEC_CONSTANTS_REL_GENERAL:
-   case I915_EXEC_CONSTANTS_ABSOLUTE:
-   case I915_EXEC_CONSTANTS_REL_SURFACE:
-   if (instp_mode != 0  ring != dev_priv-ring[RCS]) {
-   DRM_DEBUG(non-0 rel constants mode on non-RCS\n);
-   ret = -EINVAL;
-   goto error;
-   }
-
-   if (instp_mode != dev_priv-relative_constants_mode) {
-   if (INTEL_INFO(dev)-gen  4) {
-   DRM_DEBUG(no rel constants on pre-gen4\n);
-   ret = -EINVAL;
-   goto error;
-   }
-
-   if (INTEL_INFO(dev)-gen  5 
-   instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
-   DRM_DEBUG(rel surface constants mode invalid 
on gen5+\n);
-   ret = -EINVAL;
-   goto error;
-   }
-
-   /* The HW changed the meaning on this bit on gen6 */
-   if (INTEL_INFO(dev)-gen = 6)
-   instp_mask = 

[Intel-gfx] [PATCH 10/43] drm/i915/bdw: Deferred creation of user-created LRCs

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

The backing objects and ringbuffers for contexts created via open
fd are actually empty until the user starts sending execbuffers to
them. At that point, we allocate  populate them. We do this because,
at create time, we really don't know which engine is going to be used
with the context later on (and we don't want to waste memory on
objects that we might never use).

v2: As contexts created via ioctl can only be used with the render
ring, we have enough information to allocate  populate them right
away.

v3: Defer the creation always, even with ioctl-created contexts, as
requested by Daniel Vetter.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_context.c|7 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |8 
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 48d7476..fbe7278 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -784,9 +784,9 @@ int i915_switch_context(struct intel_engine_cs *ring,
return do_switch(ring, to);
 }
 
-static bool hw_context_enabled(struct drm_device *dev)
+static bool contexts_enabled(struct drm_device *dev)
 {
-   return to_i915(dev)-hw_context_size;
+   return i915.enable_execlists || to_i915(dev)-hw_context_size;
 }
 
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
@@ -797,8 +797,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, 
void *data,
struct intel_context *ctx;
int ret;
 
-   /* FIXME: allow user-created LR contexts as well */
-   if (!hw_context_enabled(dev))
+   if (!contexts_enabled(dev))
return -ENODEV;
 
ret = i915_mutex_lock_interruptible(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c5115957..4e9b387 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -951,6 +951,14 @@ i915_gem_validate_context(struct drm_device *dev, struct 
drm_file *file,
return ERR_PTR(-EIO);
}
 
+   if (i915.enable_execlists  !ctx-engine[ring-id].state) {
+   int ret = intel_lr_context_deferred_create(ctx, ring);
+   if (ret) {
+   DRM_DEBUG(Could not create LRC %u: %d\n, ctx_id, ret);
+   return ERR_PTR(ret);
+   }
+   }
+
return ctx;
 }
 
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 05/43] drm/i915/bdw: Introduce one context backing object per engine

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

A context backing object only makes sense for a given engine (because
it holds state data specific to that engine).

In legacy ringbuffer sumission mode, the only MI_SET_CONTEXT we really
perform is for the render engine, so one backing object is all we nee.

With Execlists, however, we need backing objects for every engine, as
contexts become the only way to submit workloads to the GPU. To tackle
this problem, we multiplex the context struct to contain no-of-engines
objects.

Originally, I colored this code by instantiating one new context for
every engine I wanted to use, but this change suggested by Brad Volkin
makes it more elegant.

v2: Leave the old backing object pointer behind. Daniel Vetter suggested
using a union, but it makes more sense to keep rcs_state as a NULL
pointer behind, to make sure no one uses it incorrectly when Execlists
are enabled, similar to what he suggested for ring-buffer (Rusty's API
level 5).

v3: Use the name state instead of the too-generic obj, so that it
mirrors the name choice for the legacy rcs_state.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h |6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a793d6d..b2b0c80 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -616,11 +616,17 @@ struct intel_context {
struct i915_ctx_hang_stats hang_stats;
struct i915_address_space *vm;
 
+   /* Legacy ring buffer submission */
struct {
struct drm_i915_gem_object *rcs_state;
bool initialized;
} legacy_hw_ctx;
 
+   /* Execlists */
+   struct {
+   struct drm_i915_gem_object *state;
+   } engine[I915_NUM_RINGS];
+
struct list_head link;
 };
 
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 11/43] drm/i915/bdw: Render moot context reset and switch with Execlists

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

These two functions make no sense in an Logical Ring Context  Execlists
world.

v2: We got rid of lrc_enabled and centralized everything in the sanitized
i915.enbale_execlists instead.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_context.c |9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index fbe7278..288f5de 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -380,6 +380,9 @@ void i915_gem_context_reset(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev-dev_private;
int i;
 
+   if (i915.enable_execlists)
+   return;
+
/* Prevent the hardware from restoring the last context (which hung) on
 * the next switch */
for (i = 0; i  I915_NUM_RINGS; i++) {
@@ -514,6 +517,9 @@ int i915_gem_context_enable(struct drm_i915_private 
*dev_priv)
ppgtt-enable(ppgtt);
}
 
+   if (i915.enable_execlists)
+   return 0;
+
/* FIXME: We should make this work, even in reset */
if (i915_reset_in_progress(dev_priv-gpu_error))
return 0;
@@ -769,6 +775,9 @@ int i915_switch_context(struct intel_engine_cs *ring,
 {
struct drm_i915_private *dev_priv = ring-dev-dev_private;
 
+   if (i915.enable_execlists)
+   return 0;
+
WARN_ON(!mutex_is_locked(dev_priv-dev-struct_mutex));
 
if (to-legacy_hw_ctx.rcs_state == NULL) { /* We have the fake context 
*/
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 15/43] drm/i915/bdw: Generic logical ring init and cleanup

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Allocate and populate the default LRC for every ring, call
gen-specific init/cleanup, init/fini the command parser and
set the status page (now inside the LRC object). These are
things all engines/rings have in common.

Stopping the ring before cleanup and initializing the seqnos
is left as a TODO task (we need more infrastructure in place
before we can achieve this).

v2: Check the ringbuffer backing obj for ring_is_initialized,
instead of the context backing obj (similar, but not exactly
the same).

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_context.c |4 ---
 drivers/gpu/drm/i915/intel_lrc.c|   54 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c |   17 ++
 drivers/gpu/drm/i915/intel_ringbuffer.h |6 +---
 4 files changed, 70 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 288f5de..9085ff1 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -450,10 +450,6 @@ int i915_gem_context_init(struct drm_device *dev)
 
/* NB: RCS will hold a ref for all rings */
ring-default_context = ctx;
-
-   /* FIXME: we really only want to do this for initialized rings 
*/
-   if (i915.enable_execlists)
-   intel_lr_context_deferred_create(ctx, ring);
}
 
DRM_DEBUG_DRIVER(%s context support initialized\n,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cb56bb8..05b7069 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -108,12 +108,60 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring)
 
 void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
 {
-   /* TODO */
+   if (!intel_ring_initialized(ring))
+   return;
+
+   /* TODO: make sure the ring is stopped */
+   ring-preallocated_lazy_request = NULL;
+   ring-outstanding_lazy_seqno = 0;
+
+   if (ring-cleanup)
+   ring-cleanup(ring);
+
+   i915_cmd_parser_fini_ring(ring);
+
+   if (ring-status_page.obj) {
+   kunmap(sg_page(ring-status_page.obj-pages-sgl));
+   ring-status_page.obj = NULL;
+   }
 }
 
 static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs 
*ring)
 {
-   /* TODO */
+   int ret;
+   struct intel_context *dctx = ring-default_context;
+   struct drm_i915_gem_object *dctx_obj;
+
+   /* Intentionally left blank. */
+   ring-buffer = NULL;
+
+   ring-dev = dev;
+   INIT_LIST_HEAD(ring-active_list);
+   INIT_LIST_HEAD(ring-request_list);
+   init_waitqueue_head(ring-irq_queue);
+
+   ret = intel_lr_context_deferred_create(dctx, ring);
+   if (ret)
+   return ret;
+
+   /* The status page is offset 0 from the context object in LRCs. */
+   dctx_obj = dctx-engine[ring-id].state;
+   ring-status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj);
+   ring-status_page.page_addr = kmap(sg_page(dctx_obj-pages-sgl));
+   if (ring-status_page.page_addr == NULL)
+   return -ENOMEM;
+   ring-status_page.obj = dctx_obj;
+
+   ret = i915_cmd_parser_init_ring(ring);
+   if (ret)
+   return ret;
+
+   if (ring-init) {
+   ret = ring-init(ring);
+   if (ret)
+   return ret;
+   }
+
return 0;
 }
 
@@ -397,6 +445,8 @@ int intel_lr_context_deferred_create(struct intel_context 
*ctx,
int ret;
 
BUG_ON(ctx-legacy_hw_ctx.rcs_state != NULL);
+   if (ctx-engine[ring-id].state)
+   return 0;
 
context_size = round_up(get_lr_context_size(ring), 4096);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 279dda4..20eb1a4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -40,6 +40,23 @@
  */
 #define CACHELINE_BYTES 64
 
+bool
+intel_ring_initialized(struct intel_engine_cs *ring)
+{
+   struct drm_device *dev = ring-dev;
+
+   if (!dev)
+   return false;
+
+   if (i915.enable_execlists) {
+   struct intel_context *dctx = ring-default_context;
+   struct intel_ringbuffer *ringbuf = 
dctx-engine[ring-id].ringbuf;
+
+   return ringbuf-obj;
+   } else
+   return ring-buffer  ring-buffer-obj;
+}
+
 static inline int __ring_space(int head, int tail, int size)
 {
int space = head - (tail + I915_RING_FREE_SPACE);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index be40788..7203ee2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -288,11 +288,7 @@ struct  

[Intel-gfx] [PATCH 21/43] drm/i915/bdw: Emission of requests with logical rings

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

On a previous iteration of this patch, I created an Execlists
version of __i915_add_request and asbtracted it away as a
vfunc. Daniel Vetter wondered then why that was needed:

with the clean split in command submission I expect every
function to know wether it'll submit to an lrc (everything in
intel_lrc.c) or wether it'll submit to a legacy ring (existing
code), so I don't see a need for an add_request vfunc.

The honest, hairy truth is that this patch is the glue keeping
the whole logical ring puzzle together:

- i915_add_request is used by intel_ring_idle, which in turn is
  used by i915_gpu_idle, which in turn is used in several places
  inside the eviction and gtt codes.
- Also, it is used by i915_gem_check_olr, which is littered all
  over i915_gem.c
- ...

If I were to duplicate all the code that directly or indirectly
uses __i915_add_request, I'll end up creating a separate driver.

To show the differences between the existing legacy version and
the new Execlists one, this time I have special-cased
__i915_add_request instead of adding an add_request vfunc. I
hope this helps to untangle this Gordian knot.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c  |   72 --
 drivers/gpu/drm/i915/intel_lrc.c |   30 +---
 drivers/gpu/drm/i915/intel_lrc.h |1 +
 3 files changed, 80 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9560b40..1c83b9c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2327,10 +2327,21 @@ int __i915_add_request(struct intel_engine_cs *ring,
 {
struct drm_i915_private *dev_priv = ring-dev-dev_private;
struct drm_i915_gem_request *request;
+   struct intel_ringbuffer *ringbuf;
u32 request_ring_position, request_start;
int ret;
 
-   request_start = intel_ring_get_tail(ring-buffer);
+   request = ring-preallocated_lazy_request;
+   if (WARN_ON(request == NULL))
+   return -ENOMEM;
+
+   if (i915.enable_execlists) {
+   struct intel_context *ctx = request-ctx;
+   ringbuf = ctx-engine[ring-id].ringbuf;
+   } else
+   ringbuf = ring-buffer;
+
+   request_start = intel_ring_get_tail(ringbuf);
/*
 * Emit any outstanding flushes - execbuf can fail to emit the flush
 * after having emitted the batchbuffer command. Hence we need to fix
@@ -2338,24 +2349,32 @@ int __i915_add_request(struct intel_engine_cs *ring,
 * is that the flush _must_ happen before the next request, no matter
 * what.
 */
-   ret = intel_ring_flush_all_caches(ring);
-   if (ret)
-   return ret;
-
-   request = ring-preallocated_lazy_request;
-   if (WARN_ON(request == NULL))
-   return -ENOMEM;
+   if (i915.enable_execlists) {
+   ret = logical_ring_flush_all_caches(ringbuf);
+   if (ret)
+   return ret;
+   } else {
+   ret = intel_ring_flush_all_caches(ring);
+   if (ret)
+   return ret;
+   }
 
/* Record the position of the start of the request so that
 * should we detect the updated seqno part-way through the
 * GPU processing the request, we never over-estimate the
 * position of the head.
 */
-   request_ring_position = intel_ring_get_tail(ring-buffer);
+   request_ring_position = intel_ring_get_tail(ringbuf);
 
-   ret = ring-add_request(ring);
-   if (ret)
-   return ret;
+   if (i915.enable_execlists) {
+   ret = ring-emit_request(ringbuf);
+   if (ret)
+   return ret;
+   } else {
+   ret = ring-add_request(ring);
+   if (ret)
+   return ret;
+   }
 
request-seqno = intel_ring_get_seqno(ring);
request-ring = ring;
@@ -2370,12 +2389,14 @@ int __i915_add_request(struct intel_engine_cs *ring,
 */
request-batch_obj = obj;
 
-   /* Hold a reference to the current context so that we can inspect
-* it later in case a hangcheck error event fires.
-*/
-   request-ctx = ring-last_context;
-   if (request-ctx)
-   i915_gem_context_reference(request-ctx);
+   if (!i915.enable_execlists) {
+   /* Hold a reference to the current context so that we can 
inspect
+* it later in case a hangcheck error event fires.
+*/
+   request-ctx = ring-last_context;
+   if (request-ctx)
+   i915_gem_context_reference(request-ctx);
+   }
 
request-emitted_jiffies = jiffies;
list_add_tail(request-list, ring-request_list);
@@ -2630,6 +2651,7 @@ 

[Intel-gfx] [PATCH 14/43] drm/i915/bdw: Skeleton for the new logical rings submission path

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Execlists are indeed a brave new world with respect to workload
submission to the GPU.

In previous version of these series, I have tried to impact the
legacy ringbuffer submission path as little as possible (mostly,
passing the context around and using the correct ringbuffer when I
needed one) but Daniel is afraid (probably with a reason) that
these changes and, especially, future ones, will end up breaking
older gens.

This commit and some others coming next will try to limit the
damage by creating an alternative path for workload submission.
The first step is here: laying out a new ring init/fini.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c  |5 ++
 drivers/gpu/drm/i915/intel_lrc.c |  151 ++
 drivers/gpu/drm/i915/intel_lrc.h |   12 +++
 3 files changed, 168 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6544286..9560b40 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4764,6 +4764,11 @@ int i915_gem_init(struct drm_device *dev)
dev_priv-gt.init_rings = i915_gem_init_rings;
dev_priv-gt.cleanup_ring = intel_cleanup_ring_buffer;
dev_priv-gt.stop_ring = intel_stop_ring_buffer;
+   } else {
+   dev_priv-gt.do_execbuf = intel_execlists_submission;
+   dev_priv-gt.init_rings = intel_logical_rings_init;
+   dev_priv-gt.cleanup_ring = intel_logical_ring_cleanup;
+   dev_priv-gt.stop_ring = intel_logical_ring_stop;
}
 
i915_gem_init_userptr(dev);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cf322ec..cb56bb8 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -89,6 +89,157 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, 
int enable_execlists
return 0;
 }
 
+int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
+  struct intel_engine_cs *ring,
+  struct intel_context *ctx,
+  struct drm_i915_gem_execbuffer2 *args,
+  struct list_head *vmas,
+  struct drm_i915_gem_object *batch_obj,
+  u64 exec_start, u32 flags)
+{
+   /* TODO */
+   return 0;
+}
+
+void intel_logical_ring_stop(struct intel_engine_cs *ring)
+{
+   /* TODO */
+}
+
+void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
+{
+   /* TODO */
+}
+
+static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs 
*ring)
+{
+   /* TODO */
+   return 0;
+}
+
+static int logical_render_ring_init(struct drm_device *dev)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct intel_engine_cs *ring = dev_priv-ring[RCS];
+
+   ring-name = render ring;
+   ring-id = RCS;
+   ring-mmio_base = RENDER_RING_BASE;
+   ring-irq_enable_mask =
+   GT_RENDER_USER_INTERRUPT  GEN8_RCS_IRQ_SHIFT;
+
+   return logical_ring_init(dev, ring);
+}
+
+static int logical_bsd_ring_init(struct drm_device *dev)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct intel_engine_cs *ring = dev_priv-ring[VCS];
+
+   ring-name = bsd ring;
+   ring-id = VCS;
+   ring-mmio_base = GEN6_BSD_RING_BASE;
+   ring-irq_enable_mask =
+   GT_RENDER_USER_INTERRUPT  GEN8_VCS1_IRQ_SHIFT;
+
+   return logical_ring_init(dev, ring);
+}
+
+static int logical_bsd2_ring_init(struct drm_device *dev)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct intel_engine_cs *ring = dev_priv-ring[VCS2];
+
+   ring-name = bds2 ring;
+   ring-id = VCS2;
+   ring-mmio_base = GEN8_BSD2_RING_BASE;
+   ring-irq_enable_mask =
+   GT_RENDER_USER_INTERRUPT  GEN8_VCS2_IRQ_SHIFT;
+
+   return logical_ring_init(dev, ring);
+}
+
+static int logical_blt_ring_init(struct drm_device *dev)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct intel_engine_cs *ring = dev_priv-ring[BCS];
+
+   ring-name = blitter ring;
+   ring-id = BCS;
+   ring-mmio_base = BLT_RING_BASE;
+   ring-irq_enable_mask =
+   GT_RENDER_USER_INTERRUPT  GEN8_BCS_IRQ_SHIFT;
+
+   return logical_ring_init(dev, ring);
+}
+
+static int logical_vebox_ring_init(struct drm_device *dev)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct intel_engine_cs *ring = dev_priv-ring[VECS];
+
+   ring-name = video enhancement ring;
+   ring-id = VECS;
+   ring-mmio_base = VEBOX_RING_BASE;
+   ring-irq_enable_mask =
+   GT_RENDER_USER_INTERRUPT  GEN8_VECS_IRQ_SHIFT;
+
+   return logical_ring_init(dev, ring);
+}
+
+int 

[Intel-gfx] [PATCH 29/43] drm/i915/bdw: Write the tail pointer, LRC style

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Each logical ring context has the tail pointer in the context object,
so update it before submission.

v2: New namespace.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c |   19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 535ef98..5b6f416 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -176,6 +176,21 @@ static void execlists_elsp_write(struct intel_engine_cs 
*ring,
gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
 }
 
+static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 
tail)
+{
+   struct page *page;
+   uint32_t *reg_state;
+
+   page = i915_gem_object_get_page(ctx_obj, 1);
+   reg_state = kmap_atomic(page);
+
+   reg_state[CTX_RING_TAIL+1] = tail;
+
+   kunmap_atomic(reg_state);
+
+   return 0;
+}
+
 static int execlists_submit_context(struct intel_engine_cs *ring,
struct intel_context *to0, u32 tail0,
struct intel_context *to1, u32 tail1)
@@ -187,10 +202,14 @@ static int execlists_submit_context(struct 
intel_engine_cs *ring,
BUG_ON(!ctx_obj0);
BUG_ON(!i915_gem_obj_is_pinned(ctx_obj0));
 
+   execlists_ctx_write_tail(ctx_obj0, tail0);
+
if (to1) {
ctx_obj1 = to1-engine[ring-id].state;
BUG_ON(!ctx_obj1);
BUG_ON(!i915_gem_obj_is_pinned(ctx_obj1));
+
+   execlists_ctx_write_tail(ctx_obj1, tail1);
}
 
execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 25/43] drm/i915/bdw: Workload submission mechanism for Execlists

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

This is what i915_gem_do_execbuffer calls when it wants to execute some
worload in an Execlists world.

v2: Check arguments before doing stuff in intel_execlists_submission. Also,
get rel_constants parsing right.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h|6 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |4 +-
 drivers/gpu/drm/i915/intel_lrc.c   |  130 +++-
 3 files changed, 137 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1caed52..4303e2c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2239,6 +2239,12 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, 
void *data,
  struct drm_file *file_priv);
 int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 struct drm_file *file_priv);
+void i915_gem_execbuffer_move_to_active(struct list_head *vmas,
+   struct intel_engine_cs *ring);
+void i915_gem_execbuffer_retire_commands(struct drm_device *dev,
+struct drm_file *file,
+struct intel_engine_cs *ring,
+struct drm_i915_gem_object *obj);
 int i915_gem_ringbuffer_submission(struct drm_device *dev,
   struct drm_file *file,
   struct intel_engine_cs *ring,
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 8c63d79..cae7df8 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -962,7 +962,7 @@ i915_gem_validate_context(struct drm_device *dev, struct 
drm_file *file,
return ctx;
 }
 
-static void
+void
 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
   struct intel_engine_cs *ring)
 {
@@ -994,7 +994,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
}
 }
 
-static void
+void
 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
struct drm_file *file,
struct intel_engine_cs *ring,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 55ee8dd..cd834b3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -89,6 +89,57 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, 
int enable_execlists
return 0;
 }
 
+static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
+{
+   struct intel_engine_cs *ring = ringbuf-ring;
+   uint32_t flush_domains;
+   int ret;
+
+   flush_domains = 0;
+   if (ring-gpu_caches_dirty)
+   flush_domains = I915_GEM_GPU_DOMAINS;
+
+   ret = ring-emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains);
+   if (ret)
+   return ret;
+
+   ring-gpu_caches_dirty = false;
+   return 0;
+}
+
+static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
+struct list_head *vmas)
+{
+   struct intel_engine_cs *ring = ringbuf-ring;
+   struct i915_vma *vma;
+   uint32_t flush_domains = 0;
+   bool flush_chipset = false;
+   int ret;
+
+   list_for_each_entry(vma, vmas, exec_list) {
+   struct drm_i915_gem_object *obj = vma-obj;
+   ret = i915_gem_object_sync(obj, ring);
+   if (ret)
+   return ret;
+
+   if (obj-base.write_domain  I915_GEM_DOMAIN_CPU)
+   flush_chipset |= i915_gem_clflush_object(obj, false);
+
+   flush_domains |= obj-base.write_domain;
+   }
+
+   if (flush_chipset)
+   i915_gem_chipset_flush(ring-dev);
+
+   if (flush_domains  I915_GEM_DOMAIN_GTT)
+   wmb();
+
+   /* Unconditionally invalidate gpu caches and ensure that we do flush
+* any residual writes from the previous batch.
+*/
+   return logical_ring_invalidate_all_caches(ringbuf);
+}
+
 int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
   struct intel_engine_cs *ring,
   struct intel_context *ctx,
@@ -97,7 +148,84 @@ int intel_execlists_submission(struct drm_device *dev, 
struct drm_file *file,
   struct drm_i915_gem_object *batch_obj,
   u64 exec_start, u32 flags)
 {
-   /* TODO */
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct intel_ringbuffer *ringbuf = ctx-engine[ring-id].ringbuf;
+   int instp_mode;
+   u32 instp_mask;
+   int ret;
+
+   instp_mode = args-flags  

[Intel-gfx] [PATCH 22/43] drm/i915/bdw: Ring idle and stop with logical rings

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

This is a hard one, since there is no direct hardware ring to
control when in Execlists.

We reuse intel_ring_idle here, but it should be fine as long
as i915_add_request does the ring thing.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c |   24 ++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index dcf59c6..c30518c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -103,7 +103,24 @@ int intel_execlists_submission(struct drm_device *dev, 
struct drm_file *file,
 
 void intel_logical_ring_stop(struct intel_engine_cs *ring)
 {
-   /* TODO */
+   struct drm_i915_private *dev_priv = ring-dev-dev_private;
+   int ret;
+
+   if (!intel_ring_initialized(ring))
+   return;
+
+   ret = intel_ring_idle(ring);
+   if (ret  !i915_reset_in_progress(to_i915(ring-dev)-gpu_error))
+   DRM_ERROR(failed to quiesce %s whilst cleaning up: %d\n,
+ ring-name, ret);
+
+   /* TODO: Is this correct with Execlists enabled? */
+   I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
+   if (wait_for_atomic((I915_READ_MODE(ring)  MODE_IDLE) != 0, 1000)) {
+   DRM_ERROR(%s :timed out trying to stop ring\n, ring-name);
+   return;
+   }
+   I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
 }
 
 int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf)
@@ -479,10 +496,13 @@ static int gen8_emit_request(struct intel_ringbuffer 
*ringbuf)
 
 void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
 {
+   struct drm_i915_private *dev_priv = ring-dev-dev_private;
+
if (!intel_ring_initialized(ring))
return;
 
-   /* TODO: make sure the ring is stopped */
+   intel_logical_ring_stop(ring);
+   WARN_ON((I915_READ_MODE(ring)  MODE_IDLE) == 0);
ring-preallocated_lazy_request = NULL;
ring-outstanding_lazy_seqno = 0;
 
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 28/43] drm/i915/bdw: Implement context switching (somewhat)

2014-07-24 Thread Thomas Daniel
From: Ben Widawsky benjamin.widaw...@intel.com

A context switch occurs by submitting a context descriptor to the
ExecList Submission Port. Given that we can now initialize a context,
it's possible to begin implementing the context switch by creating the
descriptor and submitting it to ELSP (actually two, since the ELSP
has two ports).

The context object must be mapped in the GGTT, which means it must exist
in the 0-4GB graphics VA range.

Signed-off-by: Ben Widawsky b...@bwidawsk.net

v2: This code has changed quite a lot in various rebases. Of particular
importance is that now we use the globally unique Submission ID to send
to the hardware. Also, context pages are now pinned unconditionally to
GGTT, so there is no need to bind them.

v3: Use LRCA[31:12] as hwCtxId[19:0]. This guarantees that the HW context
ID we submit to the ELSP is globally unique and != 0 (Bspec requirements
of the software use-only bits of the Context ID in the Context Descriptor
Format) without the hassle of the previous submission Id construction.
Also, re-add the ELSP porting read (it was dropped somewhere during the
rebases).

v4:
- Squash with drm/i915/bdw: Add forcewake lock around ELSP writes (BSPEC
  says: SW must set Force Wakeup bit to prevent GT from entering C6 while
  ELSP writes are in progress) as noted by Thomas Daniel
  (thomas.dan...@intel.com).
- Rename functions and use an execlists/intel_execlists_ namespace.
- The BUG_ON only checked that the LRCA was 32 bits, but it didn't make
  sure that it was properly aligned. Spotted by Alistair Mcaulay
  alistair.mcau...@intel.com.

v5:
- Improved source code comments as suggested by Chris Wilson.
- No need to abstract submit_ctx away, as pointed by Brad Volkin.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c |  116 +-
 drivers/gpu/drm/i915/intel_lrc.h |1 +
 2 files changed, 115 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4549eec..535ef98 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -47,6 +47,7 @@
 #define GEN8_LR_CONTEXT_ALIGN 4096
 
 #define RING_ELSP(ring)((ring)-mmio_base+0x230)
+#define RING_EXECLIST_STATUS(ring) ((ring)-mmio_base+0x234)
 #define RING_CONTEXT_CONTROL(ring) ((ring)-mmio_base+0x244)
 
 #define CTX_LRI_HEADER_0   0x01
@@ -78,6 +79,26 @@
 #define CTX_R_PWR_CLK_STATE0x42
 #define CTX_GPGPU_CSR_BASE_ADDRESS 0x44
 
+#define GEN8_CTX_VALID (10)
+#define GEN8_CTX_FORCE_PD_RESTORE (11)
+#define GEN8_CTX_FORCE_RESTORE (12)
+#define GEN8_CTX_L3LLC_COHERENT (15)
+#define GEN8_CTX_PRIVILEGE (18)
+enum {
+   ADVANCED_CONTEXT=0,
+   LEGACY_CONTEXT,
+   ADVANCED_AD_CONTEXT,
+   LEGACY_64B_CONTEXT
+};
+#define GEN8_CTX_MODE_SHIFT 3
+enum {
+   FAULT_AND_HANG=0,
+   FAULT_AND_HALT, /* Debug only */
+   FAULT_AND_STREAM,
+   FAULT_AND_CONTINUE /* Unsupported */
+};
+#define GEN8_CTX_ID_SHIFT 32
+
 int intel_sanitize_enable_execlists(struct drm_device *dev, int 
enable_execlists)
 {
if (enable_execlists == 0)
@@ -90,6 +111,93 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, 
int enable_execlists
return 0;
 }
 
+u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
+{
+   u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj);
+
+   /* LRCA is required to be 4K aligned so the more significant 20 bits
+* are globally unique */
+   return lrca  12;
+}
+
+static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj)
+{
+   uint64_t desc;
+   uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
+   BUG_ON(lrca  0x0FFFULL);
+
+   desc = GEN8_CTX_VALID;
+   desc |= LEGACY_CONTEXT  GEN8_CTX_MODE_SHIFT;
+   desc |= GEN8_CTX_L3LLC_COHERENT;
+   desc |= GEN8_CTX_PRIVILEGE;
+   desc |= lrca;
+   desc |= (u64)intel_execlists_ctx_id(ctx_obj)  GEN8_CTX_ID_SHIFT;
+
+   /* TODO: WaDisableLiteRestore when we start using semaphore
+* signalling between Command Streamers */
+   /* desc |= GEN8_CTX_FORCE_RESTORE; */
+
+   return desc;
+}
+
+static void execlists_elsp_write(struct intel_engine_cs *ring,
+struct drm_i915_gem_object *ctx_obj0,
+struct drm_i915_gem_object *ctx_obj1)
+{
+   struct drm_i915_private *dev_priv = ring-dev-dev_private;
+   uint64_t temp = 0;
+   uint32_t desc[4];
+
+   /* XXX: You must always write both descriptors in the order below. */
+   if (ctx_obj1)
+   temp = execlists_ctx_descriptor(ctx_obj1);
+   else
+   temp = 0;
+   desc[1] = (u32)(temp  32);
+   desc[0] = (u32)temp;
+
+   temp = execlists_ctx_descriptor(ctx_obj0);
+   desc[3] = (u32)(temp  32);
+   desc[2] = (u32)temp;
+
+   /* Set Force Wakeup bit

[Intel-gfx] [PATCH 26/43] drm/i915/bdw: Always use MMIO flips with Execlists

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

The normal flip function places things in the ring in the legacy
way, so we either fix that or force MMIO flips always as we do in
this patch.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/intel_display.c |2 ++
 drivers/gpu/drm/i915/intel_lrc.c |3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 5ed6a1a..8129af4 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9482,6 +9482,8 @@ static bool use_mmio_flip(struct intel_engine_cs *ring,
return false;
else if (i915.use_mmio_flip  0)
return true;
+   else if (i915.enable_execlists)
+   return true;
else
return ring != obj-ring;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cd834b3..0a04c03 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -83,7 +83,8 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, 
int enable_execlists
if (enable_execlists == 0)
return 0;
 
-   if (HAS_LOGICAL_RING_CONTEXTS(dev)  USES_PPGTT(dev))
+   if (HAS_LOGICAL_RING_CONTEXTS(dev)  USES_PPGTT(dev) 
+   i915.use_mmio_flip = 0)
return 1;
 
return 0;
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 41/43] drm/i915/bdw: Enable Logical Ring Contexts (hence, Execlists)

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

The time has come, the Walrus said, to talk of many things.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b7cf0ec..1ce51d6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2061,7 +2061,7 @@ struct drm_i915_cmd_table {
 #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)-need_gfx_hws)
 
 #define HAS_HW_CONTEXTS(dev)   (INTEL_INFO(dev)-gen = 6)
-#define HAS_LOGICAL_RING_CONTEXTS(dev) 0
+#define HAS_LOGICAL_RING_CONTEXTS(dev) (INTEL_INFO(dev)-gen = 8)
 #define HAS_ALIASING_PPGTT(dev)(INTEL_INFO(dev)-gen = 6)
 #define HAS_PPGTT(dev) (INTEL_INFO(dev)-gen = 7  !IS_GEN8(dev))
 #define USES_PPGTT(dev)intel_enable_ppgtt(dev, false)
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 23/43] drm/i915/bdw: Interrupts with logical rings

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

We need to attend context switch interrupts from all rings. Also, fixed writing
IMR/IER and added HWSTAM at ring init time.

Notice that, if added to irq_enable_mask, the context switch interrupts would
be incorrectly masked out when the user interrupts are due to no users waiting
on a sequence number. Therefore, this commit adds a bitmask of interrupts to
be kept unmasked at all times.

v2: Disable HWSTAM, as suggested by Damien (nobody listens to these interrupts,
anyway).

v3: Add new get/put_irq functions.

Signed-off-by: Thomas Daniel thomas.dan...@intel.com (v1)
Signed-off-by: Oscar Mateo oscar.ma...@intel.com (v2  v3)
---
 drivers/gpu/drm/i915/i915_irq.c |   19 --
 drivers/gpu/drm/i915/i915_reg.h |3 ++
 drivers/gpu/drm/i915/intel_lrc.c|   58 +++
 drivers/gpu/drm/i915/intel_ringbuffer.h |1 +
 4 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index a38b5c3..f77a4ca 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1643,6 +1643,8 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device 
*dev,
notify_ring(dev, dev_priv-ring[RCS]);
if (bcs  GT_RENDER_USER_INTERRUPT)
notify_ring(dev, dev_priv-ring[BCS]);
+   if ((rcs | bcs)  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
+   DRM_DEBUG_DRIVER(TODO: Context switch\n);
} else
DRM_ERROR(The master control interrupt lied (GT0)!\n);
}
@@ -1655,9 +1657,13 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device 
*dev,
vcs = tmp  GEN8_VCS1_IRQ_SHIFT;
if (vcs  GT_RENDER_USER_INTERRUPT)
notify_ring(dev, dev_priv-ring[VCS]);
+   if (vcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
+   DRM_DEBUG_DRIVER(TODO: Context switch\n);
vcs = tmp  GEN8_VCS2_IRQ_SHIFT;
if (vcs  GT_RENDER_USER_INTERRUPT)
notify_ring(dev, dev_priv-ring[VCS2]);
+   if (vcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
+   DRM_DEBUG_DRIVER(TODO: Context switch\n);
} else
DRM_ERROR(The master control interrupt lied (GT1)!\n);
}
@@ -1681,6 +1687,8 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device 
*dev,
vcs = tmp  GEN8_VECS_IRQ_SHIFT;
if (vcs  GT_RENDER_USER_INTERRUPT)
notify_ring(dev, dev_priv-ring[VECS]);
+   if (vcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
+   DRM_DEBUG_DRIVER(TODO: Context switch\n);
} else
DRM_ERROR(The master control interrupt lied (GT3)!\n);
}
@@ -3768,12 +3776,17 @@ static void gen8_gt_irq_postinstall(struct 
drm_i915_private *dev_priv)
/* These are interrupts we'll toggle with the ring mask register */
uint32_t gt_interrupts[] = {
GT_RENDER_USER_INTERRUPT  GEN8_RCS_IRQ_SHIFT |
+   GEN8_GT_CONTEXT_SWITCH_INTERRUPT  GEN8_RCS_IRQ_SHIFT |
GT_RENDER_L3_PARITY_ERROR_INTERRUPT |
-   GT_RENDER_USER_INTERRUPT  GEN8_BCS_IRQ_SHIFT,
+   GT_RENDER_USER_INTERRUPT  GEN8_BCS_IRQ_SHIFT |
+   GEN8_GT_CONTEXT_SWITCH_INTERRUPT  GEN8_BCS_IRQ_SHIFT,
GT_RENDER_USER_INTERRUPT  GEN8_VCS1_IRQ_SHIFT |
-   GT_RENDER_USER_INTERRUPT  GEN8_VCS2_IRQ_SHIFT,
+   GEN8_GT_CONTEXT_SWITCH_INTERRUPT  GEN8_VCS1_IRQ_SHIFT 
|
+   GT_RENDER_USER_INTERRUPT  GEN8_VCS2_IRQ_SHIFT |
+   GEN8_GT_CONTEXT_SWITCH_INTERRUPT  GEN8_VCS2_IRQ_SHIFT,
0,
-   GT_RENDER_USER_INTERRUPT  GEN8_VECS_IRQ_SHIFT
+   GT_RENDER_USER_INTERRUPT  GEN8_VECS_IRQ_SHIFT |
+   GEN8_GT_CONTEXT_SWITCH_INTERRUPT  GEN8_VECS_IRQ_SHIFT
};
 
for (i = 0; i  ARRAY_SIZE(gt_interrupts); i++)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 70dddac..bfc0c01 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1062,6 +1062,7 @@ enum punit_power_well {
 #define RING_ACTHD_UDW(base)   ((base)+0x5c)
 #define RING_NOPID(base)   ((base)+0x94)
 #define RING_IMR(base) ((base)+0xa8)
+#define RING_HWSTAM(base)  ((base)+0x98)
 #define RING_TIMESTAMP(base)   ((base)+0x358)
 #define   TAIL_ADDR0x0018
 #define   HEAD_WRAP_COUNT  0xFFE0
@@ -4590,6 +4591,8 @@ enum punit_power_well {
 #define GEN8_GT_IIR(which

[Intel-gfx] [PATCH 17/43] drm/i915/bdw: GEN-specific logical ring set/get seqno

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

No mistery here: the seqno is still retrieved from the engine's
HW status page (the one in the default context. For the moment,
I see no reason to worry about other context's HWS page).

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c |   20 
 1 file changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7c8b75e..f171fd5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -149,6 +149,16 @@ static int gen8_init_render_ring(struct intel_engine_cs 
*ring)
return ret;
 }
 
+static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
+{
+   return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
+}
+
+static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
+{
+   intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
+}
+
 void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
 {
if (!intel_ring_initialized(ring))
@@ -221,6 +231,8 @@ static int logical_render_ring_init(struct drm_device *dev)
 
ring-init = gen8_init_render_ring;
ring-cleanup = intel_fini_pipe_control;
+   ring-get_seqno = gen8_get_seqno;
+   ring-set_seqno = gen8_set_seqno;
 
return logical_ring_init(dev, ring);
 }
@@ -237,6 +249,8 @@ static int logical_bsd_ring_init(struct drm_device *dev)
GT_RENDER_USER_INTERRUPT  GEN8_VCS1_IRQ_SHIFT;
 
ring-init = gen8_init_common_ring;
+   ring-get_seqno = gen8_get_seqno;
+   ring-set_seqno = gen8_set_seqno;
 
return logical_ring_init(dev, ring);
 }
@@ -253,6 +267,8 @@ static int logical_bsd2_ring_init(struct drm_device *dev)
GT_RENDER_USER_INTERRUPT  GEN8_VCS2_IRQ_SHIFT;
 
ring-init = gen8_init_common_ring;
+   ring-get_seqno = gen8_get_seqno;
+   ring-set_seqno = gen8_set_seqno;
 
return logical_ring_init(dev, ring);
 }
@@ -269,6 +285,8 @@ static int logical_blt_ring_init(struct drm_device *dev)
GT_RENDER_USER_INTERRUPT  GEN8_BCS_IRQ_SHIFT;
 
ring-init = gen8_init_common_ring;
+   ring-get_seqno = gen8_get_seqno;
+   ring-set_seqno = gen8_set_seqno;
 
return logical_ring_init(dev, ring);
 }
@@ -285,6 +303,8 @@ static int logical_vebox_ring_init(struct drm_device *dev)
GT_RENDER_USER_INTERRUPT  GEN8_VECS_IRQ_SHIFT;
 
ring-init = gen8_init_common_ring;
+   ring-get_seqno = gen8_get_seqno;
+   ring-set_seqno = gen8_set_seqno;
 
return logical_ring_init(dev, ring);
 }
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 39/43] drm/i915/bdw: Print context state in debugfs

2014-07-24 Thread Thomas Daniel
From: Ben Widawsky b...@bwidawsk.net

This has turned out to be really handy in debug so far.

Update:
Since writing this patch, I've gotten similar code upstream for error
state. I've used it quite a bit in debugfs however, and I'd like to keep
it here at least until preemption is working.

Signed-off-by: Ben Widawsky b...@bwidawsk.net

This patch was accidentally dropped in the first Execlists version, and
it has been very useful indeed. Put it back again, but as a standalone
debugfs file.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c |   52 +++
 1 file changed, 52 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 0980cdd..968c3c0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1695,6 +1695,57 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
return 0;
 }
 
+static int i915_dump_lrc(struct seq_file *m, void *unused)
+{
+   struct drm_info_node *node = (struct drm_info_node *) m-private;
+   struct drm_device *dev = node-minor-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct intel_engine_cs *ring;
+   struct intel_context *ctx;
+   int ret, i;
+
+   if (!i915.enable_execlists) {
+   seq_printf(m, Logical Ring Contexts are disabled\n);
+   return 0;
+   }
+
+   ret = mutex_lock_interruptible(dev-mode_config.mutex);
+   if (ret)
+   return ret;
+
+   list_for_each_entry(ctx, dev_priv-context_list, link) {
+   for_each_ring(ring, dev_priv, i) {
+   struct drm_i915_gem_object *ctx_obj = 
ctx-engine[i].state;
+
+   if (ring-default_context == ctx)
+   continue;
+
+   if (ctx_obj) {
+   struct page *page = 
i915_gem_object_get_page(ctx_obj, 1);
+   uint32_t *reg_state = kmap_atomic(page);
+   int j;
+
+   seq_printf(m, CONTEXT: %s %u\n, ring-name,
+   
intel_execlists_ctx_id(ctx_obj));
+
+   for (j = 0; j  0x600 / sizeof(u32) / 4; j += 
4) {
+   seq_printf(m, \t[0x%08lx] 0x%08x 
0x%08x 0x%08x 0x%08x\n,
+   i915_gem_obj_ggtt_offset(ctx_obj) + 
4096 + (j * 4),
+   reg_state[j], reg_state[j + 1],
+   reg_state[j + 2], reg_state[j + 3]);
+   }
+   kunmap_atomic(reg_state);
+
+   seq_putc(m, '\n');
+   }
+   }
+   }
+
+   mutex_unlock(dev-mode_config.mutex);
+
+   return 0;
+}
+
 static int i915_execlists(struct seq_file *m, void *data)
 {
struct drm_info_node *node = (struct drm_info_node *) m-private;
@@ -3992,6 +4043,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{i915_opregion, i915_opregion, 0},
{i915_gem_framebuffer, i915_gem_framebuffer_info, 0},
{i915_context_status, i915_context_status, 0},
+   {i915_dump_lrc, i915_dump_lrc, 0},
{i915_execlists, i915_execlists, 0},
{i915_gen6_forcewake_count, i915_gen6_forcewake_count_info, 0},
{i915_swizzle_info, i915_swizzle_info, 0},
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 35/43] drm/i915/bdw: Make sure error capture keeps working with Execlists

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Since the ringbuffer does not belong per engine anymore, we have to
make sure that we are always recording the correct ringbuffer.

TODO: This is only a small fix to keep basic error capture working, but
we need to add more information for it to be useful (e.g. dump the
context being executed).

v2: Reorder how the ringbuffer is chosen to clarify the change and
rename the variable, both changes suggested by Chris Wilson. Also,
add the TODO comment to the code, as suggested by Daniel.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_gpu_error.c |   22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 45b6191..1e38576 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -874,9 +874,6 @@ static void i915_record_ring_state(struct drm_device *dev,
ering-hws = I915_READ(mmio);
}
 
-   ering-cpu_ring_head = ring-buffer-head;
-   ering-cpu_ring_tail = ring-buffer-tail;
-
ering-hangcheck_score = ring-hangcheck.score;
ering-hangcheck_action = ring-hangcheck.action;
 
@@ -936,6 +933,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 
for (i = 0; i  I915_NUM_RINGS; i++) {
struct intel_engine_cs *ring = dev_priv-ring[i];
+   struct intel_ringbuffer *rbuf;
 
error-ring[i].pid = -1;
 
@@ -979,8 +977,24 @@ static void i915_gem_record_rings(struct drm_device *dev,
}
}
 
+   if (i915.enable_execlists) {
+   /* TODO: This is only a small fix to keep basic error
+* capture working, but we need to add more information
+* for it to be useful (e.g. dump the context being
+* executed).
+*/
+   if (request)
+   rbuf = request-ctx-engine[ring-id].ringbuf;
+   else
+   rbuf = 
ring-default_context-engine[ring-id].ringbuf;
+   } else
+   rbuf = ring-buffer;
+
+   error-ring[i].cpu_ring_head = rbuf-head;
+   error-ring[i].cpu_ring_tail = rbuf-tail;
+
error-ring[i].ringbuffer =
-   i915_error_ggtt_object_create(dev_priv, 
ring-buffer-obj);
+   i915_error_ggtt_object_create(dev_priv, rbuf-obj);
 
if (ring-status_page.obj)
error-ring[i].hws_page =
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 30/43] drm/i915/bdw: Two-stage execlist submit process

2014-07-24 Thread Thomas Daniel
From: Michel Thierry michel.thie...@intel.com

Context switch (and execlist submission) should happen only when
other contexts are not active, otherwise pre-emption occurs.

To assure this, we place context switch requests in a queue and those
request are later consumed when the right context switch interrupt is
received (still TODO).

v2: Use a spinlock, do not remove the requests on unqueue (wait for
context switch completion).

Signed-off-by: Thomas Daniel thomas.dan...@intel.com

v3: Several rebases and code changes. Use unique ID.

v4:
- Move the queue/lock init to the late ring initialization.
- Damien's kmalloc review comments: check return, use sizeof(*req),
do not cast.

v5:
- Do not reuse drm_i915_gem_request. Instead, create our own.
- New namespace.

Signed-off-by: Michel Thierry michel.thie...@intel.com (v1)
Signed-off-by: Oscar Mateo oscar.ma...@intel.com (v2-v5)
---
 drivers/gpu/drm/i915/intel_lrc.c|   63 ++-
 drivers/gpu/drm/i915/intel_lrc.h|8 
 drivers/gpu/drm/i915/intel_ringbuffer.h |2 +
 3 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5b6f416..9e91169 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -217,6 +217,63 @@ static int execlists_submit_context(struct intel_engine_cs 
*ring,
return 0;
 }
 
+static void execlists_context_unqueue(struct intel_engine_cs *ring)
+{
+   struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
+   struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
+
+   if (list_empty(ring-execlist_queue))
+   return;
+
+   /* Try to read in pairs */
+   list_for_each_entry_safe(cursor, tmp, ring-execlist_queue, 
execlist_link) {
+   if (!req0)
+   req0 = cursor;
+   else if (req0-ctx == cursor-ctx) {
+   /* Same ctx: ignore first request, as second request
+* will update tail past first request's workload */
+   list_del(req0-execlist_link);
+   i915_gem_context_unreference(req0-ctx);
+   kfree(req0);
+   req0 = cursor;
+   } else {
+   req1 = cursor;
+   break;
+   }
+   }
+
+   BUG_ON(execlists_submit_context(ring, req0-ctx, req0-tail,
+   req1? req1-ctx : NULL, req1? req1-tail : 0));
+}
+
+static int execlists_context_queue(struct intel_engine_cs *ring,
+  struct intel_context *to,
+  u32 tail)
+{
+   struct intel_ctx_submit_request *req = NULL;
+   unsigned long flags;
+   bool was_empty;
+
+   req = kzalloc(sizeof(*req), GFP_KERNEL);
+   if (req == NULL)
+   return -ENOMEM;
+   req-ctx = to;
+   i915_gem_context_reference(req-ctx);
+   req-ring = ring;
+   req-tail = tail;
+
+   spin_lock_irqsave(ring-execlist_lock, flags);
+
+   was_empty = list_empty(ring-execlist_queue);
+   list_add_tail(req-execlist_link, ring-execlist_queue);
+   if (was_empty)
+   execlists_context_unqueue(ring);
+
+   spin_unlock_irqrestore(ring-execlist_lock, flags);
+
+   return 0;
+}
+
 static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
 {
struct intel_engine_cs *ring = ringbuf-ring;
@@ -405,8 +462,7 @@ void intel_logical_ring_advance_and_submit(struct 
intel_ringbuffer *ringbuf)
if (intel_ring_stopped(ring))
return;
 
-   /* FIXME: too cheeky, we don't even check if the ELSP is ready */
-   execlists_submit_context(ring, ctx, ringbuf-tail, NULL, 0);
+   execlists_context_queue(ring, ctx, ringbuf-tail);
 }
 
 static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
@@ -850,6 +906,9 @@ static int logical_ring_init(struct drm_device *dev, struct 
intel_engine_cs *rin
INIT_LIST_HEAD(ring-request_list);
init_waitqueue_head(ring-irq_queue);
 
+   INIT_LIST_HEAD(ring-execlist_queue);
+   spin_lock_init(ring-execlist_lock);
+
ret = intel_lr_context_deferred_create(dctx, ring);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index b59965b..14492a9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -60,4 +60,12 @@ int intel_execlists_submission(struct drm_device *dev, 
struct drm_file *file,
   u64 exec_start, u32 flags);
 u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
 
+struct intel_ctx_submit_request {
+   struct intel_context *ctx;
+   struct intel_engine_cs *ring;
+   u32 tail;
+
+   struct list_head execlist_link;
+};
+
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers

[Intel-gfx] [PATCH 31/43] drm/i915/bdw: Handle context switch events

2014-07-24 Thread Thomas Daniel
Handle all context status events in the context status buffer on every
context switch interrupt. We only remove work from the execlist queue
after a context status buffer reports that it has completed and we only
attempt to schedule new contexts on interrupt when a previously submitted
context completes (unless no contexts are queued, which means the GPU is
free).

We canot call intel_runtime_pm_get() in an interrupt (or with a spinlock
grabbed, FWIW), because it might sleep, which is not a nice thing to do.
Instead, do the runtime_pm get/put together with the create/destroy request,
and handle the forcewake get/put directly.

Signed-off-by: Thomas Daniel thomas.dan...@intel.com

v2: Unreferencing the context when we are freeing the request might free
the backing bo, which requires the struct_mutex to be grabbed, so defer
unreferencing and freeing to a bottom half.

v3:
- Ack the interrupt inmediately, before trying to handle it (fix for
missing interrupts by Bob Beckett robert.beck...@intel.com).
- Update the Context Status Buffer Read Pointer, just in case (spotted
by Damien Lespiau).

v4: New namespace and multiple rebase changes.

v5: Squash with drm/i915/bdw: Do not call intel_runtime_pm_get() in an
interrupt, as suggested by Daniel.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_irq.c |   35 ++---
 drivers/gpu/drm/i915/intel_lrc.c|  129 +--
 drivers/gpu/drm/i915/intel_lrc.h|3 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |1 +
 4 files changed, 151 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index f77a4ca..e4077d1 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1628,6 +1628,7 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device 
*dev,
   struct drm_i915_private *dev_priv,
   u32 master_ctl)
 {
+   struct intel_engine_cs *ring;
u32 rcs, bcs, vcs;
uint32_t tmp = 0;
irqreturn_t ret = IRQ_NONE;
@@ -1637,14 +1638,20 @@ static irqreturn_t gen8_gt_irq_handler(struct 
drm_device *dev,
if (tmp) {
I915_WRITE(GEN8_GT_IIR(0), tmp);
ret = IRQ_HANDLED;
+
rcs = tmp  GEN8_RCS_IRQ_SHIFT;
-   bcs = tmp  GEN8_BCS_IRQ_SHIFT;
+   ring = dev_priv-ring[RCS];
if (rcs  GT_RENDER_USER_INTERRUPT)
-   notify_ring(dev, dev_priv-ring[RCS]);
+   notify_ring(dev, ring);
+   if (rcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
+   intel_execlists_handle_ctx_events(ring);
+
+   bcs = tmp  GEN8_BCS_IRQ_SHIFT;
+   ring = dev_priv-ring[BCS];
if (bcs  GT_RENDER_USER_INTERRUPT)
-   notify_ring(dev, dev_priv-ring[BCS]);
-   if ((rcs | bcs)  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
-   DRM_DEBUG_DRIVER(TODO: Context switch\n);
+   notify_ring(dev, ring);
+   if (bcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
+   intel_execlists_handle_ctx_events(ring);
} else
DRM_ERROR(The master control interrupt lied (GT0)!\n);
}
@@ -1654,16 +1661,20 @@ static irqreturn_t gen8_gt_irq_handler(struct 
drm_device *dev,
if (tmp) {
I915_WRITE(GEN8_GT_IIR(1), tmp);
ret = IRQ_HANDLED;
+
vcs = tmp  GEN8_VCS1_IRQ_SHIFT;
+   ring = dev_priv-ring[VCS];
if (vcs  GT_RENDER_USER_INTERRUPT)
-   notify_ring(dev, dev_priv-ring[VCS]);
+   notify_ring(dev, ring);
if (vcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
-   DRM_DEBUG_DRIVER(TODO: Context switch\n);
+   intel_execlists_handle_ctx_events(ring);
+
vcs = tmp  GEN8_VCS2_IRQ_SHIFT;
+   ring = dev_priv-ring[VCS2];
if (vcs  GT_RENDER_USER_INTERRUPT)
-   notify_ring(dev, dev_priv-ring[VCS2]);
+   notify_ring(dev, ring);
if (vcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
-   DRM_DEBUG_DRIVER(TODO: Context switch\n);
+   intel_execlists_handle_ctx_events(ring);
} else
DRM_ERROR(The master control interrupt lied (GT1)!\n);
}
@@ -1684,11 +1695,13 @@ static irqreturn_t gen8_gt_irq_handler(struct 
drm_device *dev,
if (tmp

[Intel-gfx] [PATCH 19/43] drm/i915/bdw: GEN-specific logical ring emit request

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Very similar to the legacy add_request, only modified to account for
logical ringbuffer.

v2: Use MI_GLOBAL_GTT, as suggested by Brad Volkin.

v3: Unify render and non-render in the same function, as noticed by
Brad Volkin.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_reg.h |1 +
 drivers/gpu/drm/i915/intel_lrc.c|   31 +++
 drivers/gpu/drm/i915/intel_ringbuffer.h |3 +++
 3 files changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 043a6ea..70dddac 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -272,6 +272,7 @@
 #define   MI_SEMAPHORE_POLL(115)
 #define   MI_SEMAPHORE_SAD_GTE_SDD (112)
 #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1)
+#define MI_STORE_DWORD_IMM_GEN8MI_INSTR(0x20, 2)
 #define   MI_MEM_VIRTUAL   (1  22) /* 965+ only */
 #define MI_STORE_DWORD_INDEX   MI_INSTR(0x21, 1)
 #define   MI_STORE_DWORD_INDEX_SHIFT 2
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index bd37d51..64bda7a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -352,6 +352,32 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, 
u32 seqno)
intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
 }
 
+static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
+{
+   struct intel_engine_cs *ring = ringbuf-ring;
+   u32 cmd;
+   int ret;
+
+   ret = intel_logical_ring_begin(ringbuf, 6);
+   if (ret)
+   return ret;
+
+   cmd = MI_STORE_DWORD_IMM_GEN8;
+   cmd |= MI_GLOBAL_GTT;
+
+   intel_logical_ring_emit(ringbuf, cmd);
+   intel_logical_ring_emit(ringbuf,
+   (ring-status_page.gfx_addr +
+   (I915_GEM_HWS_INDEX  
MI_STORE_DWORD_INDEX_SHIFT)));
+   intel_logical_ring_emit(ringbuf, 0);
+   intel_logical_ring_emit(ringbuf, ring-outstanding_lazy_seqno);
+   intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
+   intel_logical_ring_emit(ringbuf, MI_NOOP);
+   intel_logical_ring_advance_and_submit(ringbuf);
+
+   return 0;
+}
+
 void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
 {
if (!intel_ring_initialized(ring))
@@ -426,6 +452,7 @@ static int logical_render_ring_init(struct drm_device *dev)
ring-cleanup = intel_fini_pipe_control;
ring-get_seqno = gen8_get_seqno;
ring-set_seqno = gen8_set_seqno;
+   ring-emit_request = gen8_emit_request;
 
return logical_ring_init(dev, ring);
 }
@@ -444,6 +471,7 @@ static int logical_bsd_ring_init(struct drm_device *dev)
ring-init = gen8_init_common_ring;
ring-get_seqno = gen8_get_seqno;
ring-set_seqno = gen8_set_seqno;
+   ring-emit_request = gen8_emit_request;
 
return logical_ring_init(dev, ring);
 }
@@ -462,6 +490,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev)
ring-init = gen8_init_common_ring;
ring-get_seqno = gen8_get_seqno;
ring-set_seqno = gen8_set_seqno;
+   ring-emit_request = gen8_emit_request;
 
return logical_ring_init(dev, ring);
 }
@@ -480,6 +509,7 @@ static int logical_blt_ring_init(struct drm_device *dev)
ring-init = gen8_init_common_ring;
ring-get_seqno = gen8_get_seqno;
ring-set_seqno = gen8_set_seqno;
+   ring-emit_request = gen8_emit_request;
 
return logical_ring_init(dev, ring);
 }
@@ -498,6 +528,7 @@ static int logical_vebox_ring_init(struct drm_device *dev)
ring-init = gen8_init_common_ring;
ring-get_seqno = gen8_get_seqno;
ring-set_seqno = gen8_set_seqno;
+   ring-emit_request = gen8_emit_request;
 
return logical_ring_init(dev, ring);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c305df0..176ee6a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -215,6 +215,9 @@ struct  intel_engine_cs {
  unsigned int num_dwords);
} semaphore;
 
+   /* Execlists */
+   int (*emit_request)(struct intel_ringbuffer *ringbuf);
+
/**
 * List of objects currently involved in rendering from the
 * ringbuffer.
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 34/43] drm/i915/bdw: Make sure gpu reset still works with Execlists

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

If we reset a ring after a hang, we have to make sure that we clear
out all queued Execlists requests.

v2: The ring is, at this point, already being correctly re-programmed
for Execlists, and the hangcheck counters cleared.

v3: Daniel suggests to drop the if (execlists) because the Execlists
queue should be empty in legacy mode (which is true, if we do the
INIT_LIST_HEAD).

v4: Do the pending intel_runtime_pm_put

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c |   12 
 drivers/gpu/drm/i915/intel_ringbuffer.c |1 +
 2 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1c83b9c..143cff7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2567,6 +2567,18 @@ static void i915_gem_reset_ring_cleanup(struct 
drm_i915_private *dev_priv,
i915_gem_free_request(request);
}
 
+   while (!list_empty(ring-execlist_queue)) {
+   struct intel_ctx_submit_request *submit_req;
+
+   submit_req = list_first_entry(ring-execlist_queue,
+   struct intel_ctx_submit_request,
+   execlist_link);
+   list_del(submit_req-execlist_link);
+   intel_runtime_pm_put(dev_priv);
+   i915_gem_context_unreference(submit_req-ctx);
+   kfree(submit_req);
+   }
+
/* These may not have been flush before the reset, do so now */
kfree(ring-preallocated_lazy_request);
ring-preallocated_lazy_request = NULL;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 3188403..6e604c9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1587,6 +1587,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
ring-dev = dev;
INIT_LIST_HEAD(ring-active_list);
INIT_LIST_HEAD(ring-request_list);
+   INIT_LIST_HEAD(ring-execlist_queue);
ringbuf-size = 32 * PAGE_SIZE;
ringbuf-ring = ring;
ringbuf-ctx = ring-default_context;
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 37/43] drm/i915/bdw: Display execlists info in debugfs

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

v2: Warn and return if LRCs are not enabled.

v3: Grab the Execlists spinlock (noticed by Daniel Vetter).

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c |   73 +++
 drivers/gpu/drm/i915/intel_lrc.c|6 ---
 drivers/gpu/drm/i915/intel_lrc.h|7 
 3 files changed, 80 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index fc39610..903ed67 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1674,6 +1674,78 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
return 0;
 }
 
+static int i915_execlists(struct seq_file *m, void *data)
+{
+   struct drm_info_node *node = (struct drm_info_node *) m-private;
+   struct drm_device *dev = node-minor-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct intel_engine_cs *ring;
+   u32 status_pointer;
+   u8 read_pointer;
+   u8 write_pointer;
+   u32 status;
+   u32 ctx_id;
+   struct list_head *cursor;
+   int ring_id, i;
+
+   if (!i915.enable_execlists) {
+   seq_printf(m, Logical Ring Contexts are disabled\n);
+   return 0;
+   }
+
+   for_each_ring(ring, dev_priv, ring_id) {
+   struct intel_ctx_submit_request *head_req = NULL;
+   int count = 0;
+   unsigned long flags;
+
+   seq_printf(m, %s\n, ring-name);
+
+   status = I915_READ(RING_EXECLIST_STATUS(ring));
+   ctx_id = I915_READ(RING_EXECLIST_STATUS(ring) + 4);
+   seq_printf(m, \tExeclist status: 0x%08X, context: %u\n,
+   status, ctx_id);
+
+   status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
+   seq_printf(m, \tStatus pointer: 0x%08X\n, status_pointer);
+
+   read_pointer = ring-next_context_status_buffer;
+   write_pointer = status_pointer  0x07;
+   if (read_pointer  write_pointer)
+   write_pointer += 6;
+   seq_printf(m, \tRead pointer: 0x%08X, write pointer 0x%08X\n,
+   read_pointer, write_pointer);
+
+   for (i = 0; i  6; i++) {
+   status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + 8*i);
+   ctx_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + 8*i 
+ 4);
+
+   seq_printf(m, \tStatus buffer %d: 0x%08X, context: 
%u\n,
+   i, status, ctx_id);
+   }
+
+   spin_lock_irqsave(ring-execlist_lock, flags);
+   list_for_each(cursor, ring-execlist_queue)
+   count++;
+   head_req = list_first_entry_or_null(ring-execlist_queue,
+   struct intel_ctx_submit_request, execlist_link);
+   spin_unlock_irqrestore(ring-execlist_lock, flags);
+
+   seq_printf(m, \t%d requests in queue\n, count);
+   if (head_req) {
+   struct drm_i915_gem_object *ctx_obj;
+
+   ctx_obj = head_req-ctx-engine[ring_id].state;
+   seq_printf(m, \tHead request id: %u\n,
+   intel_execlists_ctx_id(ctx_obj));
+   seq_printf(m, \tHead request tail: %u\n, 
head_req-tail);
+   }
+
+   seq_putc(m, '\n');
+   }
+
+   return 0;
+}
+
 static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data)
 {
struct drm_info_node *node = m-private;
@@ -3899,6 +3971,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{i915_opregion, i915_opregion, 0},
{i915_gem_framebuffer, i915_gem_framebuffer_info, 0},
{i915_context_status, i915_context_status, 0},
+   {i915_execlists, i915_execlists, 0},
{i915_gen6_forcewake_count, i915_gen6_forcewake_count_info, 0},
{i915_swizzle_info, i915_swizzle_info, 0},
{i915_ppgtt_info, i915_ppgtt_info, 0},
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 829b15d..8056fa4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -46,12 +46,6 @@
 
 #define GEN8_LR_CONTEXT_ALIGN 4096
 
-#define RING_ELSP(ring)((ring)-mmio_base+0x230)
-#define RING_EXECLIST_STATUS(ring) ((ring)-mmio_base+0x234)
-#define RING_CONTEXT_CONTROL(ring) ((ring)-mmio_base+0x244)
-#define RING_CONTEXT_STATUS_BUF(ring)  ((ring)-mmio_base+0x370)
-#define RING_CONTEXT_STATUS_PTR(ring)  ((ring)-mmio_base+0x3a0)
-
 #define RING_EXECLIST_QFULL(1  0x2)
 #define RING_EXECLIST1_VALID   (1  0x3)
 #define RING_EXECLIST0_VALID   (1  0x4)
diff --git a/drivers/gpu/drm/i915/intel_lrc.h 

[Intel-gfx] [PATCH 24/43] drm/i915/bdw: GEN-specific logical ring emit batchbuffer start

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Dispatch_execbuffer's evil twin.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c|   28 
 drivers/gpu/drm/i915/intel_ringbuffer.h |2 ++
 2 files changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a6dcb3a..55ee8dd 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -384,6 +384,29 @@ static int gen8_init_render_ring(struct intel_engine_cs 
*ring)
return ret;
 }
 
+static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf,
+ u64 offset, unsigned flags)
+{
+   struct intel_engine_cs *ring = ringbuf-ring;
+   struct drm_i915_private *dev_priv = ring-dev-dev_private;
+   bool ppgtt = dev_priv-mm.aliasing_ppgtt != NULL 
+   !(flags  I915_DISPATCH_SECURE);
+   int ret;
+
+   ret = intel_logical_ring_begin(ringbuf, 4);
+   if (ret)
+   return ret;
+
+   /* FIXME(BDW): Address space and security selectors. */
+   intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | 
(ppgtt8));
+   intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
+   intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
+   intel_logical_ring_emit(ringbuf, MI_NOOP);
+   intel_logical_ring_advance(ringbuf);
+
+   return 0;
+}
+
 static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring)
 {
struct drm_device *dev = ring-dev;
@@ -615,6 +638,7 @@ static int logical_render_ring_init(struct drm_device *dev)
ring-emit_flush = gen8_emit_flush_render;
ring-irq_get = gen8_logical_ring_get_irq;
ring-irq_put = gen8_logical_ring_put_irq;
+   ring-emit_bb_start = gen8_emit_bb_start;
 
return logical_ring_init(dev, ring);
 }
@@ -639,6 +663,7 @@ static int logical_bsd_ring_init(struct drm_device *dev)
ring-emit_flush = gen8_emit_flush;
ring-irq_get = gen8_logical_ring_get_irq;
ring-irq_put = gen8_logical_ring_put_irq;
+   ring-emit_bb_start = gen8_emit_bb_start;
 
return logical_ring_init(dev, ring);
 }
@@ -663,6 +688,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev)
ring-emit_flush = gen8_emit_flush;
ring-irq_get = gen8_logical_ring_get_irq;
ring-irq_put = gen8_logical_ring_put_irq;
+   ring-emit_bb_start = gen8_emit_bb_start;
 
return logical_ring_init(dev, ring);
 }
@@ -687,6 +713,7 @@ static int logical_blt_ring_init(struct drm_device *dev)
ring-emit_flush = gen8_emit_flush;
ring-irq_get = gen8_logical_ring_get_irq;
ring-irq_put = gen8_logical_ring_put_irq;
+   ring-emit_bb_start = gen8_emit_bb_start;
 
return logical_ring_init(dev, ring);
 }
@@ -711,6 +738,7 @@ static int logical_vebox_ring_init(struct drm_device *dev)
ring-emit_flush = gen8_emit_flush;
ring-irq_get = gen8_logical_ring_get_irq;
ring-irq_put = gen8_logical_ring_put_irq;
+   ring-emit_bb_start = gen8_emit_bb_start;
 
return logical_ring_init(dev, ring);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 09102b2..c885d5c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -228,6 +228,8 @@ struct  intel_engine_cs {
int (*emit_flush)(struct intel_ringbuffer *ringbuf,
  u32 invalidate_domains,
  u32 flush_domains);
+   int (*emit_bb_start)(struct intel_ringbuffer *ringbuf,
+u64 offset, unsigned flags);
 
/**
 * List of objects currently involved in rendering from the
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 02/43] drm/i915/bdw: New source and header file for LRs, LRCs and Execlists

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Some legacy HW context code assumptions don't make sense for this new
submission method, so we will place this stuff in a separate file.

Note for reviewers: I've carefully considered the best name for this file
and this was my best option (other possibilities were intel_lr_context.c
or intel_execlist.c). I am open to a certain bikeshedding on this matter,
anyway.

And some point in time, it would be a good idea to split intel_lrc.c/.h
even further, but for the moment just shove everything together.

v2: Change to intel_lrc.c

v3: Squash together with the header file addition

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/Makefile|1 +
 drivers/gpu/drm/i915/i915_drv.h  |1 +
 drivers/gpu/drm/i915/intel_lrc.c |   42 ++
 drivers/gpu/drm/i915/intel_lrc.h |   27 
 4 files changed, 71 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/intel_lrc.c
 create mode 100644 drivers/gpu/drm/i915/intel_lrc.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index cad1683..9fee2a0 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -31,6 +31,7 @@ i915-y += i915_cmd_parser.o \
  i915_gpu_error.o \
  i915_irq.o \
  i915_trace_points.o \
+ intel_lrc.o \
  intel_ringbuffer.o \
  intel_uncore.o
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 44a63f3..54c2bd9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -35,6 +35,7 @@
 #include i915_reg.h
 #include intel_bios.h
 #include intel_ringbuffer.h
+#include intel_lrc.h
 #include i915_gem_gtt.h
 #include linux/io-mapping.h
 #include linux/i2c.h
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
new file mode 100644
index 000..49bb6fc
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Ben Widawsky b...@bwidawsk.net
+ *Michel Thierry michel.thie...@intel.com
+ *Thomas Daniel thomas.dan...@intel.com
+ *Oscar Mateo oscar.ma...@intel.com
+ *
+ */
+
+/*
+ * GEN8 brings an expansion of the HW contexts: Logical Ring Contexts.
+ * These expanded contexts enable a number of new abilities, especially
+ * Execlists (also implemented in this file).
+ *
+ * Execlists are the new method by which, on gen8+ hardware, workloads are
+ * submitted for execution (as opposed to the legacy, ringbuffer-based, 
method).
+ */
+
+#include drm/drmP.h
+#include drm/i915_drm.h
+#include i915_drv.h
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
new file mode 100644
index 000..f6830a4
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

[Intel-gfx] [PATCH 06/43] drm/i915/bdw: A bit more advanced LR context alloc/free

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Now that we have the ability to allocate our own context backing objects
and we have multiplexed one of them per engine inside the context structs,
we can finally allocate and free them correctly.

Regarding the context size, reading the register to calculate the sizes
can work, I think, however the docs are very clear about the actual
context sizes on GEN8, so just hardcode that and use it.

v2: Rebased on top of the Full PPGTT series. It is important to notice
that at this point we have one global default context per engine, all
of them using the aliasing PPGTT (as opposed to the single global
default context we have with legacy HW contexts).

v3:
- Go back to one single global default context, this time with multiple
  backing objects inside.
- Use different context sizes for non-render engines, as suggested by
  Damien (still hardcoded, since the information about the context size
  registers in the BSpec is, well, *lacking*).
- Render ctx size is 20 (or 19) pages, but not 21 (caught by Damien).
- Move default context backing object creation to intel_init_ring (so
  that we don't waste memory in rings that might not get initialized).

v4:
- Reuse the HW legacy context init/fini.
- Create a separate free function.
- Rename the functions with an intel_ preffix.

v5: Several rebases to account for the changes in the previous patches.

Signed-off-by: Ben Widawsky b...@bwidawsk.net (v1)
Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h |2 ++
 drivers/gpu/drm/i915/i915_gem_context.c |2 +-
 drivers/gpu/drm/i915/intel_lrc.c|   59 +--
 3 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b2b0c80..f2a6598 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2500,6 +2500,8 @@ int i915_switch_context(struct intel_engine_cs *ring,
 struct intel_context *
 i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id);
 void i915_gem_context_free(struct kref *ctx_ref);
+struct drm_i915_gem_object *
+i915_gem_alloc_context_obj(struct drm_device *dev, size_t size);
 static inline void i915_gem_context_reference(struct intel_context *ctx)
 {
kref_get(ctx-ref);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 718150e..48d7476 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -201,7 +201,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
kfree(ctx);
 }
 
-static struct drm_i915_gem_object *
+struct drm_i915_gem_object *
 i915_gem_alloc_context_obj(struct drm_device *dev, size_t size)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8cc6b55..a3fc6fc 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -41,6 +41,11 @@
 #include drm/i915_drm.h
 #include i915_drv.h
 
+#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
+#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
+
+#define GEN8_LR_CONTEXT_ALIGN 4096
+
 int intel_sanitize_enable_execlists(struct drm_device *dev, int 
enable_execlists)
 {
if (enable_execlists == 0)
@@ -54,15 +59,65 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, 
int enable_execlists
 
 void intel_lr_context_free(struct intel_context *ctx)
 {
-   /* TODO */
+   int i;
+
+   for (i = 0; i  I915_NUM_RINGS; i++) {
+   struct drm_i915_gem_object *ctx_obj = ctx-engine[i].state;
+   if (ctx_obj) {
+   i915_gem_object_ggtt_unpin(ctx_obj);
+   drm_gem_object_unreference(ctx_obj-base);
+   }
+   }
+}
+
+static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
+{
+   int ret = 0;
+
+   WARN_ON(INTEL_INFO(ring-dev)-gen != 8);
+
+   switch (ring-id) {
+   case RCS:
+   ret = GEN8_LR_CONTEXT_RENDER_SIZE;
+   break;
+   case VCS:
+   case BCS:
+   case VECS:
+   case VCS2:
+   ret = GEN8_LR_CONTEXT_OTHER_SIZE;
+   break;
+   }
+
+   return ret;
 }
 
 int intel_lr_context_deferred_create(struct intel_context *ctx,
 struct intel_engine_cs *ring)
 {
+   struct drm_device *dev = ring-dev;
+   struct drm_i915_gem_object *ctx_obj;
+   uint32_t context_size;
+   int ret;
+
BUG_ON(ctx-legacy_hw_ctx.rcs_state != NULL);
 
-   /* TODO */
+   context_size = round_up(get_lr_context_size(ring), 4096);
+
+   ctx_obj = i915_gem_alloc_context_obj(dev, context_size);
+   if (IS_ERR(ctx_obj)) {
+   ret = PTR_ERR(ctx_obj);
+   DRM_DEBUG_DRIVER(Alloc LRC backing obj failed: %d\n, ret);
+   return ret;
+   }
+
+   ret = 

[Intel-gfx] [PATCH 36/43] drm/i915/bdw: Disable semaphores for Execlists

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Up until recently, semaphores weren't enabled in BDW so we didn't care
about them. But then Rodrigo came and enabled them:

   commit 521e62e49a42661a4ee0102644517dbe2f100a23
   Author: Rodrigo Vivi rodrigo.v...@intel.com

  drm/i915: Enable semaphores on BDW

So now we have to explicitly disable them for Execlists until both
features play nicely.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.c |4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5e4fefd..3489102 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -477,6 +477,10 @@ bool i915_semaphore_is_enabled(struct drm_device *dev)
if (i915.semaphores = 0)
return i915.semaphores;
 
+   /* TODO: make semaphores and Execlists play nicely together */
+   if (i915.enable_execlists)
+   return false;
+
 #ifdef CONFIG_INTEL_IOMMU
/* Enable semaphores on SNB when IO remapping is off */
if (INTEL_INFO(dev)-gen == 6  intel_iommu_gfx_mapped)
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 32/43] drm/i915/bdw: Avoid non-lite-restore preemptions

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

In the current Execlists feeding mechanism, full preemption is not
supported yet: only lite-restores are allowed (this is: the GPU
simply samples a new tail pointer for the context currently in
execution).

But we have identified an scenario in which a full preemption occurs:
1) We submit two contexts for execution (A  B).
2) The GPU finishes with the first one (A), switches to the second one
(B) and informs us.
3) We submit B again (hoping to cause a lite restore) together with C,
but in the time we spend writing to the ELSP, the GPU finishes B.
4) The GPU start executing B again (since we told it so).
5) We receive a B finished interrupt and, mistakenly, we submit C (again)
and D, causing a full preemption of B.

The race is avoided by keeping track of how many times a context has been
submitted to the hardware and by better discriminating the received context
switch interrupts: in the example, when we have submitted B twice, we won´t
submit C and D as soon as we receive the notification that B is completed
because we were expecting to get a LITE_RESTORE and we didn´t, so we know a
second completion will be received shortly.

Without this explicit checking, somehow, the batch buffer execution order
gets messed with. This can be verified with the IGT test I sent together with
the series. I don´t know the exact mechanism by which the pre-emption messes
with the execution order but, since other people is working on the Scheduler
+ Preemption on Execlists, I didn´t try to fix it. In these series, only Lite
Restores are supported (other kind of preemptions WARN).

v2: elsp_submitted belongs in the new intel_ctx_submit_request. Several
rebase changes.

v3: Clarify how the race is avoided, as requested by Daniel.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c |   28 
 drivers/gpu/drm/i915/intel_lrc.h |2 ++
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 65f4f26..895dbfc 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -264,6 +264,7 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
else if (req0-ctx == cursor-ctx) {
/* Same ctx: ignore first request, as second request
 * will update tail past first request's workload */
+   cursor-elsp_submitted = req0-elsp_submitted;
list_del(req0-execlist_link);
queue_work(dev_priv-wq, req0-work);
req0 = cursor;
@@ -273,8 +274,14 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
}
}
 
+   WARN_ON(req1  req1-elsp_submitted);
+
BUG_ON(execlists_submit_context(ring, req0-ctx, req0-tail,
req1? req1-ctx : NULL, req1? req1-tail : 0));
+
+   req0-elsp_submitted++;
+   if (req1)
+   req1-elsp_submitted++;
 }
 
 static bool execlists_check_remove_request(struct intel_engine_cs *ring,
@@ -291,9 +298,13 @@ static bool execlists_check_remove_request(struct 
intel_engine_cs *ring,
struct drm_i915_gem_object *ctx_obj =
head_req-ctx-engine[ring-id].state;
if (intel_execlists_ctx_id(ctx_obj) == request_id) {
-   list_del(head_req-execlist_link);
-   queue_work(dev_priv-wq, head_req-work);
-   return true;
+   WARN(head_req-elsp_submitted == 0,
+   Never submitted head request\n);
+   if (--head_req-elsp_submitted = 0) {
+   list_del(head_req-execlist_link);
+   queue_work(dev_priv-wq, head_req-work);
+   return true;
+   }
}
}
 
@@ -326,7 +337,16 @@ void intel_execlists_handle_ctx_events(struct 
intel_engine_cs *ring)
status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
(read_pointer % 6) * 8 + 4);
 
-   if (status  GEN8_CTX_STATUS_COMPLETE) {
+   if (status  GEN8_CTX_STATUS_PREEMPTED) {
+   if (status  GEN8_CTX_STATUS_LITE_RESTORE) {
+   if (execlists_check_remove_request(ring, 
status_id))
+   WARN(1, Lite Restored request removed 
from queue\n);
+   } else
+   WARN(1, Preemption without Lite Restore\n);
+   }
+
+if ((status  GEN8_CTX_STATUS_ACTIVE_IDLE) ||
+(status  GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
if (execlists_check_remove_request(ring, status_id))
  

[Intel-gfx] [PATCH 18/43] drm/i915/bdw: New logical ring submission mechanism

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Well, new-ish: if all this code looks familiar, that's because it's
a clone of the existing submission mechanism (with some modifications
here and there to adapt it to LRCs and Execlists).

And why did we do this instead of reusing code, one might wonder?
Well, there are some fears that the differences are big enough that
they will end up breaking all platforms.

Also, Execlists offer several advantages, like control over when the
GPU is done with a given workload, that can help simplify the
submission mechanism, no doubt. I am interested in getting Execlists
to work first and foremost, but in the future this parallel submission
mechanism will help us to fine tune the mechanism without affecting
old gens.

v2: Pass the ringbuffer only (whenever possible).

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c|  193 +++
 drivers/gpu/drm/i915/intel_lrc.h|   12 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c |   20 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |3 +
 4 files changed, 218 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f171fd5..bd37d51 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -106,6 +106,199 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring)
/* TODO */
 }
 
+void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
+{
+   intel_logical_ring_advance(ringbuf);
+
+   if (intel_ring_stopped(ringbuf-ring))
+   return;
+
+   /* TODO: how to submit a context to the ELSP is not here yet */
+}
+
+static int logical_ring_alloc_seqno(struct intel_engine_cs *ring)
+{
+   if (ring-outstanding_lazy_seqno)
+   return 0;
+
+   if (ring-preallocated_lazy_request == NULL) {
+   struct drm_i915_gem_request *request;
+
+   request = kmalloc(sizeof(*request), GFP_KERNEL);
+   if (request == NULL)
+   return -ENOMEM;
+
+   ring-preallocated_lazy_request = request;
+   }
+
+   return i915_gem_get_seqno(ring-dev, ring-outstanding_lazy_seqno);
+}
+
+static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf, int 
bytes)
+{
+   struct intel_engine_cs *ring = ringbuf-ring;
+   struct drm_i915_gem_request *request;
+   u32 seqno = 0;
+   int ret;
+
+   if (ringbuf-last_retired_head != -1) {
+   ringbuf-head = ringbuf-last_retired_head;
+   ringbuf-last_retired_head = -1;
+
+   ringbuf-space = intel_ring_space(ringbuf);
+   if (ringbuf-space = bytes)
+   return 0;
+   }
+
+   list_for_each_entry(request, ring-request_list, list) {
+   if (__intel_ring_space(request-tail, ringbuf-tail,
+   ringbuf-size) = bytes) {
+   seqno = request-seqno;
+   break;
+   }
+   }
+
+   if (seqno == 0)
+   return -ENOSPC;
+
+   ret = i915_wait_seqno(ring, seqno);
+   if (ret)
+   return ret;
+
+   /* TODO: make sure we update the right ringbuffer's last_retired_head
+* when retiring requests */
+   i915_gem_retire_requests_ring(ring);
+   ringbuf-head = ringbuf-last_retired_head;
+   ringbuf-last_retired_head = -1;
+
+   ringbuf-space = intel_ring_space(ringbuf);
+   return 0;
+}
+
+static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf, int 
bytes)
+{
+   struct intel_engine_cs *ring = ringbuf-ring;
+   struct drm_device *dev = ring-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   unsigned long end;
+   int ret;
+
+   ret = logical_ring_wait_request(ringbuf, bytes);
+   if (ret != -ENOSPC)
+   return ret;
+
+   /* Force the context submission in case we have been skipping it */
+   intel_logical_ring_advance_and_submit(ringbuf);
+
+   /* With GEM the hangcheck timer should kick us out of the loop,
+* leaving it early runs the risk of corrupting GEM state (due
+* to running on almost untested codepaths). But on resume
+* timers don't work yet, so prevent a complete hang in that
+* case by choosing an insanely large timeout. */
+   end = jiffies + 60 * HZ;
+
+   do {
+   ringbuf-head = I915_READ_HEAD(ring);
+   ringbuf-space = intel_ring_space(ringbuf);
+   if (ringbuf-space = bytes) {
+   ret = 0;
+   break;
+   }
+
+   if (!drm_core_check_feature(dev, DRIVER_MODESET) 
+   dev-primary-master) {
+   struct drm_i915_master_private *master_priv = 
dev-primary-master-driver_priv;
+   if (master_priv-sarea_priv)
+

[Intel-gfx] [PATCH 42/43] drm/i915/bdw: Pin the context backing objects to GGTT on-demand

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Up until now, we have pinned every logical ring context backing object
during creation, and left it pinned until destruction. This made my life
easier, but it's a harmful thing to do, because we cause fragmentation
of the GGTT (and, eventually, we would run out of space).

This patch makes the pinning on-demand: the backing objects of the two
contexts that are written to the ELSP are pinned right before submission
and unpinned once the hardware is done with them. The only context that
is still pinned regardless is the global default one, so that the HWS can
still be accessed in the same way (ring-status_page).

v2: In the early version of this patch, we were pinning the context as
we put it into the ELSP: on the one hand, this is very efficient because
only a maximum two contexts are pinned at any given time, but on the other
hand, we cannot really pin in interrupt time :(

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c |   11 +++--
 drivers/gpu/drm/i915/i915_drv.h |1 +
 drivers/gpu/drm/i915/i915_gem.c |   44 ---
 drivers/gpu/drm/i915/intel_lrc.c|   42 -
 drivers/gpu/drm/i915/intel_lrc.h|2 ++
 5 files changed, 73 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 968c3c0..84531cc 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1721,10 +1721,15 @@ static int i915_dump_lrc(struct seq_file *m, void 
*unused)
continue;
 
if (ctx_obj) {
-   struct page *page = 
i915_gem_object_get_page(ctx_obj, 1);
-   uint32_t *reg_state = kmap_atomic(page);
+   struct page *page ;
+   uint32_t *reg_state;
int j;
 
+   i915_gem_obj_ggtt_pin(ctx_obj, 
GEN8_LR_CONTEXT_ALIGN, 0);
+
+   page = i915_gem_object_get_page(ctx_obj, 1);
+   reg_state = kmap_atomic(page);
+
seq_printf(m, CONTEXT: %s %u\n, ring-name,

intel_execlists_ctx_id(ctx_obj));
 
@@ -1736,6 +1741,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
}
kunmap_atomic(reg_state);
 
+   i915_gem_object_ggtt_unpin(ctx_obj);
+
seq_putc(m, '\n');
}
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1ce51d6..70466af 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -628,6 +628,7 @@ struct intel_context {
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
+   atomic_t unpin_count;
} engine[I915_NUM_RINGS];
 
struct list_head link;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 143cff7..42faaa3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2491,12 +2491,23 @@ static void i915_set_reset_status(struct 
drm_i915_private *dev_priv,
 
 static void i915_gem_free_request(struct drm_i915_gem_request *request)
 {
+   struct intel_context *ctx = request-ctx;
+
list_del(request-list);
i915_gem_request_remove_from_client(request);
 
-   if (request-ctx)
-   i915_gem_context_unreference(request-ctx);
+   if (ctx) {
+   struct intel_engine_cs *ring = request-ring;
+   struct drm_i915_gem_object *ctx_obj = 
ctx-engine[ring-id].state;
+   atomic_t *unpin_count = ctx-engine[ring-id].unpin_count;
 
+   if (ctx_obj) {
+   if (atomic_dec_return(unpin_count) == 0 
+   ctx != ring-default_context)
+   i915_gem_object_ggtt_unpin(ctx_obj);
+   }
+   i915_gem_context_unreference(ctx);
+   }
kfree(request);
 }
 
@@ -2551,6 +2562,23 @@ static void i915_gem_reset_ring_cleanup(struct 
drm_i915_private *dev_priv,
}
 
/*
+* Clear the execlists queue up before freeing the requests, as those
+* are the ones that keep the context and ringbuffer backing objects
+* pinned in place.
+*/
+   while (!list_empty(ring-execlist_queue)) {
+   struct intel_ctx_submit_request *submit_req;
+
+   submit_req = list_first_entry(ring-execlist_queue,
+   struct intel_ctx_submit_request,
+   execlist_link);
+   

[Intel-gfx] [PATCH 27/43] drm/i915/bdw: Render state init for Execlists

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

The batchbuffer that sets the render context state is submitted
in a different way, and from different places.

We needed to make both the render state preparation and free functions
outside accesible, and namespace accordingly. This mess is so that all
LR, LRC and Execlists functionality can go together in intel_lrc.c: we
can fix all of this later on, once the interfaces are clear.

v2: Create a separate ctx-rcs_initialized for the Execlists case, as
suggested by Chris Wilson.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h  |4 +--
 drivers/gpu/drm/i915/i915_gem_context.c  |   17 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c |   40 ++
 drivers/gpu/drm/i915/i915_gem_render_state.h |   47 ++
 drivers/gpu/drm/i915/intel_lrc.c |   46 +
 drivers/gpu/drm/i915/intel_lrc.h |2 ++
 drivers/gpu/drm/i915/intel_renderstate.h |8 +
 7 files changed, 139 insertions(+), 25 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_render_state.h

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4303e2c..b7cf0ec 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -37,6 +37,7 @@
 #include intel_ringbuffer.h
 #include intel_lrc.h
 #include i915_gem_gtt.h
+#include i915_gem_render_state.h
 #include linux/io-mapping.h
 #include linux/i2c.h
 #include linux/i2c-algo-bit.h
@@ -623,6 +624,7 @@ struct intel_context {
} legacy_hw_ctx;
 
/* Execlists */
+   bool rcs_initialized;
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
@@ -2553,8 +2555,6 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, 
void *data,
 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
   struct drm_file *file);
 
-/* i915_gem_render_state.c */
-int i915_gem_render_state_init(struct intel_engine_cs *ring);
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct drm_device *dev,
  struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 9085ff1..0dc6992 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -513,8 +513,23 @@ int i915_gem_context_enable(struct drm_i915_private 
*dev_priv)
ppgtt-enable(ppgtt);
}
 
-   if (i915.enable_execlists)
+   if (i915.enable_execlists) {
+   struct intel_context *dctx;
+
+   ring = dev_priv-ring[RCS];
+   dctx = ring-default_context;
+
+   if (!dctx-rcs_initialized) {
+   ret = intel_lr_context_render_state_init(ring, dctx);
+   if (ret) {
+   DRM_ERROR(Init render state failed: %d\n, 
ret);
+   return ret;
+   }
+   dctx-rcs_initialized = true;
+   }
+
return 0;
+   }
 
/* FIXME: We should make this work, even in reset */
if (i915_reset_in_progress(dev_priv-gpu_error))
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
b/drivers/gpu/drm/i915/i915_gem_render_state.c
index e60be3f..a9a62d7 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -28,13 +28,6 @@
 #include i915_drv.h
 #include intel_renderstate.h
 
-struct render_state {
-   const struct intel_renderstate_rodata *rodata;
-   struct drm_i915_gem_object *obj;
-   u64 ggtt_offset;
-   int gen;
-};
-
 static const struct intel_renderstate_rodata *
 render_state_get_rodata(struct drm_device *dev, const int gen)
 {
@@ -127,30 +120,47 @@ static int render_state_setup(struct render_state *so)
return 0;
 }
 
-static void render_state_fini(struct render_state *so)
+void i915_gem_render_state_fini(struct render_state *so)
 {
i915_gem_object_ggtt_unpin(so-obj);
drm_gem_object_unreference(so-obj-base);
 }
 
-int i915_gem_render_state_init(struct intel_engine_cs *ring)
+int i915_gem_render_state_prepare(struct intel_engine_cs *ring,
+ struct render_state *so)
 {
-   struct render_state so;
int ret;
 
if (WARN_ON(ring-id != RCS))
return -ENOENT;
 
-   ret = render_state_init(so, ring-dev);
+   ret = render_state_init(so, ring-dev);
if (ret)
return ret;
 
-   if (so.rodata == NULL)
+   if (so-rodata == NULL)
return 0;
 
-   ret = render_state_setup(so);
+   ret = render_state_setup(so);
+   if (ret) {
+   i915_gem_render_state_fini(so);
+   

[Intel-gfx] [PATCH 20/43] drm/i915/bdw: GEN-specific logical ring emit flush

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Same as the legacy-style ring-flush.

v2: The BSD invalidate bit still exists in GEN8! Add it for the VCS
rings (but still consolidate the blt and bsd ring flushes into one).
This was noticed by Brad Volkin.

v3: The command for BSD and for other rings is slightly different:
get it exactly the same as in gen6_ring_flush + gen6_bsd_ring_flush

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c|   82 +++
 drivers/gpu/drm/i915/intel_ringbuffer.c |7 ---
 drivers/gpu/drm/i915/intel_ringbuffer.h |   10 
 3 files changed, 92 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 64bda7a..5dd63d6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -342,6 +342,83 @@ static int gen8_init_render_ring(struct intel_engine_cs 
*ring)
return ret;
 }
 
+static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
+  u32 invalidate_domains,
+  u32 unused)
+{
+   struct intel_engine_cs *ring = ringbuf-ring;
+   struct drm_device *dev = ring-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   uint32_t cmd;
+   int ret;
+
+   ret = intel_logical_ring_begin(ringbuf, 4);
+   if (ret)
+   return ret;
+
+   cmd = MI_FLUSH_DW + 1;
+
+   if (ring == dev_priv-ring[VCS]) {
+   if (invalidate_domains  I915_GEM_GPU_DOMAINS)
+   cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
+   MI_FLUSH_DW_STORE_INDEX | 
MI_FLUSH_DW_OP_STOREDW;
+   } else {
+   if (invalidate_domains  I915_GEM_DOMAIN_RENDER)
+   cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
+   MI_FLUSH_DW_OP_STOREDW;
+   }
+
+   intel_logical_ring_emit(ringbuf, cmd);
+   intel_logical_ring_emit(ringbuf, I915_GEM_HWS_SCRATCH_ADDR | 
MI_FLUSH_DW_USE_GTT);
+   intel_logical_ring_emit(ringbuf, 0); /* upper addr */
+   intel_logical_ring_emit(ringbuf, 0); /* value */
+   intel_logical_ring_advance(ringbuf);
+
+   return 0;
+}
+
+static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
+ u32 invalidate_domains,
+ u32 flush_domains)
+{
+   struct intel_engine_cs *ring = ringbuf-ring;
+   u32 scratch_addr = ring-scratch.gtt_offset + 2 * CACHELINE_BYTES;
+   u32 flags = 0;
+   int ret;
+
+   flags |= PIPE_CONTROL_CS_STALL;
+
+   if (flush_domains) {
+   flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+   flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+   }
+
+   if (invalidate_domains) {
+   flags |= PIPE_CONTROL_TLB_INVALIDATE;
+   flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+   flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+   flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+   flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+   flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+   flags |= PIPE_CONTROL_QW_WRITE;
+   flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+   }
+
+   ret = intel_logical_ring_begin(ringbuf, 6);
+   if (ret)
+   return ret;
+
+   intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
+   intel_logical_ring_emit(ringbuf, flags);
+   intel_logical_ring_emit(ringbuf, scratch_addr);
+   intel_logical_ring_emit(ringbuf, 0);
+   intel_logical_ring_emit(ringbuf, 0);
+   intel_logical_ring_emit(ringbuf, 0);
+   intel_logical_ring_advance(ringbuf);
+
+   return 0;
+}
+
 static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
 {
return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
@@ -453,6 +530,7 @@ static int logical_render_ring_init(struct drm_device *dev)
ring-get_seqno = gen8_get_seqno;
ring-set_seqno = gen8_set_seqno;
ring-emit_request = gen8_emit_request;
+   ring-emit_flush = gen8_emit_flush_render;
 
return logical_ring_init(dev, ring);
 }
@@ -472,6 +550,7 @@ static int logical_bsd_ring_init(struct drm_device *dev)
ring-get_seqno = gen8_get_seqno;
ring-set_seqno = gen8_set_seqno;
ring-emit_request = gen8_emit_request;
+   ring-emit_flush = gen8_emit_flush;
 
return logical_ring_init(dev, ring);
 }
@@ -491,6 +570,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev)
ring-get_seqno = gen8_get_seqno;
ring-set_seqno = gen8_set_seqno;
ring-emit_request = gen8_emit_request;
+   ring-emit_flush = gen8_emit_flush;
 
return logical_ring_init(dev, ring);
 }
@@ -510,6 +590,7 @@ static int logical_blt_ring_init(struct drm_device *dev)
ring-get_seqno = gen8_get_seqno;
  

[Intel-gfx] [PATCH 43/43] drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Same as with the context, pinning to GGTT regardless is harmful (it
badly fragments the GGTT and can even exhaust it).

Unfortunately, this case is also more complex than the previous one
because we need to map and access the ringbuffer in several places
along the execbuffer path (and we cannot make do by leaving the
default ringbuffer pinned, as before). Also, the context object
itself contains a pointer to the ringbuffer address that we have to
keep updated if we are going to allow the ringbuffer to move around.

v2: Same as with the context pinning, we cannot really do it during
an interrupt. Also, pin the default ringbuffers objects regardless
(makes error capture a lot easier).

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c |5 +-
 drivers/gpu/drm/i915/intel_lrc.c|   80 -
 drivers/gpu/drm/i915/intel_ringbuffer.c |   83 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.h |3 ++
 4 files changed, 111 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 42faaa3..1a852b9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2498,13 +2498,16 @@ static void i915_gem_free_request(struct 
drm_i915_gem_request *request)
 
if (ctx) {
struct intel_engine_cs *ring = request-ring;
+   struct intel_ringbuffer *ringbuf = 
ctx-engine[ring-id].ringbuf;
struct drm_i915_gem_object *ctx_obj = 
ctx-engine[ring-id].state;
atomic_t *unpin_count = ctx-engine[ring-id].unpin_count;
 
if (ctx_obj) {
if (atomic_dec_return(unpin_count) == 0 
-   ctx != ring-default_context)
+   ctx != ring-default_context) {
+   intel_unpin_ringbuffer_obj(ringbuf);
i915_gem_object_ggtt_unpin(ctx_obj);
+   }
}
i915_gem_context_unreference(ctx);
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9fa8e35..4ca8278 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -315,7 +315,9 @@ static void execlists_elsp_write(struct intel_engine_cs 
*ring,
spin_unlock_irqrestore(dev_priv-uncore.lock, flags);
 }
 
-static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 
tail)
+static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
+   struct drm_i915_gem_object *ring_obj,
+   u32 tail)
 {
struct page *page;
uint32_t *reg_state;
@@ -324,6 +326,7 @@ static int execlists_ctx_write_tail(struct 
drm_i915_gem_object *ctx_obj, u32 tai
reg_state = kmap_atomic(page);
 
reg_state[CTX_RING_TAIL+1] = tail;
+   reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
 
kunmap_atomic(reg_state);
 
@@ -334,21 +337,25 @@ static int execlists_submit_context(struct 
intel_engine_cs *ring,
struct intel_context *to0, u32 tail0,
struct intel_context *to1, u32 tail1)
 {
-   struct drm_i915_gem_object *ctx_obj0;
+   struct drm_i915_gem_object *ctx_obj0 = to0-engine[ring-id].state;
+   struct intel_ringbuffer *ringbuf0 = to0-engine[ring-id].ringbuf;
struct drm_i915_gem_object *ctx_obj1 = NULL;
+   struct intel_ringbuffer *ringbuf1 = NULL;
 
-   ctx_obj0 = to0-engine[ring-id].state;
BUG_ON(!ctx_obj0);
BUG_ON(!i915_gem_obj_is_pinned(ctx_obj0));
+   BUG_ON(!i915_gem_obj_is_pinned(ringbuf0-obj));
 
-   execlists_ctx_write_tail(ctx_obj0, tail0);
+   execlists_update_context(ctx_obj0, ringbuf0-obj, tail0);
 
if (to1) {
+   ringbuf1 = to1-engine[ring-id].ringbuf;
ctx_obj1 = to1-engine[ring-id].state;
BUG_ON(!ctx_obj1);
BUG_ON(!i915_gem_obj_is_pinned(ctx_obj1));
+   BUG_ON(!i915_gem_obj_is_pinned(ringbuf1-obj));
 
-   execlists_ctx_write_tail(ctx_obj1, tail1);
+   execlists_update_context(ctx_obj1, ringbuf1-obj, tail1);
}
 
execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
@@ -772,6 +779,7 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs 
*ring,
 
if (ring-preallocated_lazy_request == NULL) {
struct drm_i915_gem_request *request;
+   struct intel_ringbuffer *ringbuf = 
ctx-engine[ring-id].ringbuf;
struct drm_i915_gem_object *ctx_obj = 
ctx-engine[ring-id].state;
atomic_t *unpin_count = ctx-engine[ring-id].unpin_count;
 
@@ -787,6 +795,13 @@ static int logical_ring_alloc_seqno(struct 

[Intel-gfx] [PATCH 38/43] drm/i915/bdw: Display context backing obj ringbuffer info in debugfs

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c |   25 +++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 903ed67..0980cdd 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1629,6 +1629,12 @@ static int i915_gem_framebuffer_info(struct seq_file *m, 
void *data)
 
return 0;
 }
+static void describe_ctx_ringbuf(struct seq_file *m, struct intel_ringbuffer 
*ringbuf)
+{
+   seq_printf(m,  (ringbuffer, space: %d, head: %u, tail: %u, last head: 
%d),
+   ringbuf-space, ringbuf-head, ringbuf-tail,
+   ringbuf-last_retired_head);
+}
 
 static int i915_context_status(struct seq_file *m, void *unused)
 {
@@ -1656,7 +1662,7 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
}
 
list_for_each_entry(ctx, dev_priv-context_list, link) {
-   if (ctx-legacy_hw_ctx.rcs_state == NULL)
+   if (!i915.enable_execlists  ctx-legacy_hw_ctx.rcs_state == 
NULL)
continue;
 
seq_puts(m, HW context );
@@ -1665,7 +1671,22 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
if (ring-default_context == ctx)
seq_printf(m, (default context %s) , 
ring-name);
 
-   describe_obj(m, ctx-legacy_hw_ctx.rcs_state);
+   if (i915.enable_execlists) {
+   seq_putc(m, '\n');
+   for_each_ring(ring, dev_priv, i) {
+   struct drm_i915_gem_object *ctx_obj = 
ctx-engine[i].state;
+   struct intel_ringbuffer *ringbuf = 
ctx-engine[i].ringbuf;
+
+   seq_printf(m, %s: , ring-name);
+   if (ctx_obj)
+   describe_obj(m, ctx_obj);
+   if (ringbuf)
+   describe_ctx_ringbuf(m, ringbuf);
+   seq_putc(m, '\n');
+   }
+   } else
+   describe_obj(m, ctx-legacy_hw_ctx.rcs_state);
+
seq_putc(m, '\n');
}
 
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 40/43] drm/i915/bdw: Document Logical Rings, LR contexts and Execlists

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Add theory of operation notes to intel_lrc.c and comments to externally
visible functions.

v2: Add notes on logical ring context creation.

v3: Use kerneldoc.

v4: Integrate it in the DocBook template.

Signed-off-by: Thomas Daniel thomas.dan...@intel.com (v1)
Signed-off-by: Oscar Mateo oscar.ma...@intel.com (v2, v3)
---
 Documentation/DocBook/drm.tmpl   |5 +
 drivers/gpu/drm/i915/intel_lrc.c |  215 +-
 drivers/gpu/drm/i915/intel_lrc.h |   30 ++
 3 files changed, 249 insertions(+), 1 deletion(-)

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 97838551..91a5620 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -3909,6 +3909,11 @@ int num_ioctls;/synopsis
 !Pdrivers/gpu/drm/i915/i915_cmd_parser.c batch buffer command parser
 !Idrivers/gpu/drm/i915/i915_cmd_parser.c
   /sect2
+  sect2
+titleLogical Rings, Logical Ring Contexts and Execlists/title
+!Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings, Logical Ring Contexts and 
Execlists
+!Idrivers/gpu/drm/i915/intel_lrc.c
+  /sect2
 /sect1
   /chapter
 /part
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8056fa4..5faa084 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -28,13 +28,108 @@
  *
  */
 
-/*
+/**
+ * DOC: Logical Rings, Logical Ring Contexts and Execlists
+ *
+ * Motivation:
  * GEN8 brings an expansion of the HW contexts: Logical Ring Contexts.
  * These expanded contexts enable a number of new abilities, especially
  * Execlists (also implemented in this file).
  *
+ * One of the main differences with the legacy HW contexts is that logical
+ * ring contexts incorporate many more things to the context's state, like
+ * PDPs or ringbuffer control registers:
+ *
+ * The reason why PDPs are included in the context is straightforward: as
+ * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
+ * contained there mean you don't need to do a ppgtt-switch_mm yourself,
+ * instead, the GPU will do it for you on the context switch.
+ *
+ * But, what about the ringbuffer control registers (head, tail, etc..)?
+ * shouldn't we just need a set of those per engine command streamer? This is
+ * where the name Logical Rings starts to make sense: by virtualizing the
+ * rings, the engine cs shifts to a new ring buffer with every context
+ * switch. When you want to submit a workload to the GPU you: A) choose your
+ * context, B) find its appropriate virtualized ring, C) write commands to it
+ * and then, finally, D) tell the GPU to switch to that context.
+ *
+ * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
+ * to a contexts is via a context execution list, ergo Execlists.
+ *
+ * LRC implementation:
+ * Regarding the creation of contexts, we have:
+ *
+ * - One global default context.
+ * - One local default context for each opened fd.
+ * - One local extra context for each context create ioctl call.
+ *
+ * Now that ringbuffers belong per-context (and not per-engine, like before)
+ * and that contexts are uniquely tied to a given engine (and not reusable,
+ * like before) we need:
+ *
+ * - One ringbuffer per-engine inside each context.
+ * - One backing object per-engine inside each context.
+ *
+ * The global default context starts its life with these new objects fully
+ * allocated and populated. The local default context for each opened fd is
+ * more complex, because we don't know at creation time which engine is going
+ * to use them. To handle this, we have implemented a deferred creation of LR
+ * contexts:
+ *
+ * The local context starts its life as a hollow or blank holder, that only
+ * gets populated for a given engine once we receive an execbuffer. If later
+ * on we receive another execbuffer ioctl for the same context but a different
+ * engine, we allocate/populate a new ringbuffer and context backing object and
+ * so on.
+ *
+ * Finally, regarding local contexts created using the ioctl call: as they are
+ * only allowed with the render ring, we can allocate  populate them right
+ * away (no need to defer anything, at least for now).
+ *
+ * Execlists implementation:
  * Execlists are the new method by which, on gen8+ hardware, workloads are
  * submitted for execution (as opposed to the legacy, ringbuffer-based, 
method).
+ * This method works as follows:
+ *
+ * When a request is committed, its commands (the BB start and any leading or
+ * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
+ * for the appropriate context. The tail pointer in the hardware context is not
+ * updated at this time, but instead, kept by the driver in the ringbuffer
+ * structure. A structure representing this request is added to a request queue
+ * for the appropriate engine: this structure contains a copy of the context's
+ * tail after

[Intel-gfx] [PATCH 33/43] drm/i915/bdw: Help out the ctx switch interrupt handler

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

If we receive a storm of requests for the same context (see gem_storedw_loop_*)
we might end up iterating over too many elements in interrupt time, looking for
contexts to squash together. Instead, share the burden by giving more
intelligence to the queue function. At most, the interrupt will iterate over
three elements.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c |   26 ++
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 895dbfc..829b15d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -384,9 +384,10 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
   struct intel_context *to,
   u32 tail)
 {
-   struct intel_ctx_submit_request *req = NULL;
+   struct drm_i915_private *dev_priv = ring-dev-dev_private;
+   struct intel_ctx_submit_request *req = NULL, *cursor;
unsigned long flags;
-   bool was_empty;
+   int num_elements = 0;
 
req = kzalloc(sizeof(*req), GFP_KERNEL);
if (req == NULL)
@@ -400,9 +401,26 @@ static int execlists_context_queue(struct intel_engine_cs 
*ring,
 
spin_lock_irqsave(ring-execlist_lock, flags);
 
-   was_empty = list_empty(ring-execlist_queue);
+   list_for_each_entry(cursor, ring-execlist_queue, execlist_link)
+   if (++num_elements  2)
+   break;
+
+   if (num_elements  2) {
+   struct intel_ctx_submit_request *tail_req;
+
+   tail_req = list_last_entry(ring-execlist_queue,
+   struct intel_ctx_submit_request,
+   execlist_link);
+   if (to == tail_req-ctx) {
+   WARN(tail_req-elsp_submitted != 0,
+   More than 2 already-submitted reqs 
queued\n);
+   list_del(tail_req-execlist_link);
+   queue_work(dev_priv-wq, tail_req-work);
+   }
+   }
+
list_add_tail(req-execlist_link, ring-execlist_queue);
-   if (was_empty)
+   if (num_elements == 0)
execlists_context_unqueue(ring);
 
spin_unlock_irqrestore(ring-execlist_lock, flags);
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 16/43] drm/i915/bdw: GEN-specific logical ring init

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Logical rings do not need most of the initialization their
legacy ringbuffer counterparts do: we just need the pipe
control object for the render ring, enable Execlists on the
hardware and a few workarounds.

v2: Squash with: drm/i915: Extract pipe control fini  make
init outside accesible.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c|   54 +++
 drivers/gpu/drm/i915/intel_ringbuffer.c |   34 +++
 drivers/gpu/drm/i915/intel_ringbuffer.h |3 ++
 3 files changed, 78 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 05b7069..7c8b75e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -106,6 +106,49 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring)
/* TODO */
 }
 
+static int gen8_init_common_ring(struct intel_engine_cs *ring)
+{
+   struct drm_device *dev = ring-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+
+   I915_WRITE(RING_MODE_GEN7(ring),
+   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
+   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
+   POSTING_READ(RING_MODE_GEN7(ring));
+   DRM_DEBUG_DRIVER(Execlists enabled for %s\n, ring-name);
+
+   memset(ring-hangcheck, 0, sizeof(ring-hangcheck));
+
+   return 0;
+}
+
+static int gen8_init_render_ring(struct intel_engine_cs *ring)
+{
+   struct drm_device *dev = ring-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   int ret;
+
+   ret = gen8_init_common_ring(ring);
+   if (ret)
+   return ret;
+
+   /* We need to disable the AsyncFlip performance optimisations in order
+* to use MI_WAIT_FOR_EVENT within the CS. It should already be
+* programmed to '1' on all products.
+*
+* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
+*/
+   I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
+
+   ret = intel_init_pipe_control(ring);
+   if (ret)
+   return ret;
+
+   I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
+
+   return ret;
+}
+
 void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
 {
if (!intel_ring_initialized(ring))
@@ -176,6 +219,9 @@ static int logical_render_ring_init(struct drm_device *dev)
ring-irq_enable_mask =
GT_RENDER_USER_INTERRUPT  GEN8_RCS_IRQ_SHIFT;
 
+   ring-init = gen8_init_render_ring;
+   ring-cleanup = intel_fini_pipe_control;
+
return logical_ring_init(dev, ring);
 }
 
@@ -190,6 +236,8 @@ static int logical_bsd_ring_init(struct drm_device *dev)
ring-irq_enable_mask =
GT_RENDER_USER_INTERRUPT  GEN8_VCS1_IRQ_SHIFT;
 
+   ring-init = gen8_init_common_ring;
+
return logical_ring_init(dev, ring);
 }
 
@@ -204,6 +252,8 @@ static int logical_bsd2_ring_init(struct drm_device *dev)
ring-irq_enable_mask =
GT_RENDER_USER_INTERRUPT  GEN8_VCS2_IRQ_SHIFT;
 
+   ring-init = gen8_init_common_ring;
+
return logical_ring_init(dev, ring);
 }
 
@@ -218,6 +268,8 @@ static int logical_blt_ring_init(struct drm_device *dev)
ring-irq_enable_mask =
GT_RENDER_USER_INTERRUPT  GEN8_BCS_IRQ_SHIFT;
 
+   ring-init = gen8_init_common_ring;
+
return logical_ring_init(dev, ring);
 }
 
@@ -232,6 +284,8 @@ static int logical_vebox_ring_init(struct drm_device *dev)
ring-irq_enable_mask =
GT_RENDER_USER_INTERRUPT  GEN8_VECS_IRQ_SHIFT;
 
+   ring-init = gen8_init_common_ring;
+
return logical_ring_init(dev, ring);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 20eb1a4..ca45c58 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -573,8 +573,25 @@ out:
return ret;
 }
 
-static int
-init_pipe_control(struct intel_engine_cs *ring)
+void
+intel_fini_pipe_control(struct intel_engine_cs *ring)
+{
+   struct drm_device *dev = ring-dev;
+
+   if (ring-scratch.obj == NULL)
+   return;
+
+   if (INTEL_INFO(dev)-gen = 5) {
+   kunmap(sg_page(ring-scratch.obj-pages-sgl));
+   i915_gem_object_ggtt_unpin(ring-scratch.obj);
+   }
+
+   drm_gem_object_unreference(ring-scratch.obj-base);
+   ring-scratch.obj = NULL;
+}
+
+int
+intel_init_pipe_control(struct intel_engine_cs *ring)
 {
int ret;
 
@@ -649,7 +666,7 @@ static int init_render_ring(struct intel_engine_cs *ring)
   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
 
if (INTEL_INFO(dev)-gen = 5) {
-   ret = init_pipe_control(ring);
+   ret = intel_init_pipe_control(ring);
if (ret)
return ret;
}
@@ 

[Intel-gfx] [PATCH 13/43] drm/i915: Abstract the legacy workload submission mechanism away

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

As suggested by Daniel Vetter. The idea, in subsequent patches, is to
provide an alternative to these vfuncs for the Execlists submission
mechanism.

v2: Splitted into two and reordered to illustrate our intentions, instead
of showing it off. Also, remove the add_request vfunc and added the
stop_ring one.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h|   24 
 drivers/gpu/drm/i915/i915_gem.c|   15 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   20 ++--
 3 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ff2c373..1caed52 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1617,6 +1617,21 @@ struct drm_i915_private {
/* Old ums support infrastructure, same warning applies. */
struct i915_ums_state ums;
 
+   /* Abstract the submission mechanism (legacy ringbuffer or execlists) 
away */
+   struct {
+   int (*do_execbuf) (struct drm_device *dev, struct drm_file 
*file,
+  struct intel_engine_cs *ring,
+  struct intel_context *ctx,
+  struct drm_i915_gem_execbuffer2 *args,
+  struct list_head *vmas,
+  struct drm_i915_gem_object *batch_obj,
+  u64 exec_start, u32 flags);
+   int (*init_rings) (struct drm_device *dev);
+   void (*cleanup_ring) (struct intel_engine_cs *ring);
+   void (*stop_ring) (struct intel_engine_cs *ring);
+   bool (*is_ring_initialized) (struct intel_engine_cs *ring);
+   } gt;
+
/*
 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
 * will be rejected. Instead look for a better place.
@@ -2224,6 +2239,14 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, 
void *data,
  struct drm_file *file_priv);
 int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 struct drm_file *file_priv);
+int i915_gem_ringbuffer_submission(struct drm_device *dev,
+  struct drm_file *file,
+  struct intel_engine_cs *ring,
+  struct intel_context *ctx,
+  struct drm_i915_gem_execbuffer2 *args,
+  struct list_head *vmas,
+  struct drm_i915_gem_object *batch_obj,
+  u64 exec_start, u32 flags);
 int i915_gem_execbuffer(struct drm_device *dev, void *data,
struct drm_file *file_priv);
 int i915_gem_execbuffer2(struct drm_device *dev, void *data,
@@ -2376,6 +2399,7 @@ void i915_gem_reset(struct drm_device *dev);
 bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
+int i915_gem_init_rings(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
 int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice);
 void i915_gem_init_swizzling(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d8bf4fa..6544286 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4518,7 +4518,7 @@ i915_gem_stop_ringbuffers(struct drm_device *dev)
int i;
 
for_each_ring(ring, dev_priv, i)
-   intel_stop_ring_buffer(ring);
+   dev_priv-gt.stop_ring(ring);
 }
 
 int
@@ -4635,7 +4635,7 @@ intel_enable_blt(struct drm_device *dev)
return true;
 }
 
-static int i915_gem_init_rings(struct drm_device *dev)
+int i915_gem_init_rings(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev-dev_private;
int ret;
@@ -4718,7 +4718,7 @@ i915_gem_init_hw(struct drm_device *dev)
 
i915_gem_init_swizzling(dev);
 
-   ret = i915_gem_init_rings(dev);
+   ret = dev_priv-gt.init_rings(dev);
if (ret)
return ret;
 
@@ -4759,6 +4759,13 @@ int i915_gem_init(struct drm_device *dev)
DRM_DEBUG_DRIVER(allow wake ack timed out\n);
}
 
+   if (!i915.enable_execlists) {
+   dev_priv-gt.do_execbuf = i915_gem_ringbuffer_submission;
+   dev_priv-gt.init_rings = i915_gem_init_rings;
+   dev_priv-gt.cleanup_ring = intel_cleanup_ring_buffer;
+   dev_priv-gt.stop_ring = intel_stop_ring_buffer;
+   }
+
i915_gem_init_userptr(dev);
i915_gem_init_global_gtt(dev);
 
@@ -4794,7 +4801,7 @@