Re: [Intel-gfx] [PATCH 2/2] drm/i915/gen8: Apply Per-context workarounds using W/A batch buffers

2015-03-02 Thread Siluvery, Arun

On 02/03/2015 10:10, Michel Thierry wrote:



On 25/02/15 17:54, Arun Siluvery wrote:

Some of the workarounds are to be applied during context save but before
restore and some at the end of context save/restore but before executing
the instructions in the ring. Workaround batch buffers are created for
this purpose as they cannot be applied using normal means. HW executes
them at specific stages during context save/restore.

In this method we initialize batch buffer with w/a commands and its address
is supplied using context offset pointers when a context is initialized.

This patch introduces indirect and per-context batch buffers using which
following workarounds are applied. These are required to fix issues
observed with preemption related workloads.

In Indirect context w/a batch buffer,
+WaDisableCtxRestoreArbitration
+WaFlushCoherentL3CacheLinesAtContextSwitch
+WaClearSlmSpaceAtContextSwitch

In Per context w/a batch buffer,
+WaDisableCtxRestoreArbitration
+WaRsRestoreWithPerCtxtBb

v2: Use GTT address type for all privileged instructions, update as
per dynamic pinning changes, minor simplifications, rename variables
as follows to keep lines under 80 chars and rebase.
s/indirect_ctx_wa_ringbuf/indirect_ctx_wa_bb
s/per_ctx_wa_ringbuf/per_ctx_wa_bb

v3: Modify WA BB initialization to Gen specific.

Change-Id: I0cedb536b7f6d9f10ba9e81ba625848e7bab603c
Signed-off-by: Rafael Barbalho 
Signed-off-by: Arun Siluvery 
---
   drivers/gpu/drm/i915/i915_drv.h |   3 +
   drivers/gpu/drm/i915/i915_reg.h |  30 +++-
   drivers/gpu/drm/i915/intel_lrc.c| 302 
+++-
   drivers/gpu/drm/i915/intel_ringbuffer.h |   3 +
   4 files changed, 297 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d42040f..86cdb52 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -774,6 +774,9 @@ struct intel_context {

/* Execlists */
bool rcs_initialized;
+   struct intel_ringbuffer *indirect_ctx_wa_bb;
+   struct intel_ringbuffer *per_ctx_wa_bb;
+
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 55143cb..eb41d7f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -347,6 +347,26 @@
   #define   MI_INVALIDATE_BSD  (1<<7)
   #define   MI_FLUSH_DW_USE_GTT(1<<2)
   #define   MI_FLUSH_DW_USE_PPGTT  (0<<2)
+#define MI_ATOMIC(len) MI_INSTR(0x2F, (len-2))
+#define   MI_ATOMIC_MEMORY_TYPE_GGTT   (1<<22)
+#define   MI_ATOMIC_INLINE_DATA(1<<18)
+#define   MI_ATOMIC_CS_STALL   (1<<17)
+#define   MI_ATOMIC_RETURN_DATA_CTL(1<<16)
+#define MI_ATOMIC_OP_MASK(op)  ((op) << 8)
+#define MI_ATOMIC_AND  MI_ATOMIC_OP_MASK(0x01)
+#define MI_ATOMIC_OR   MI_ATOMIC_OP_MASK(0x02)
+#define MI_ATOMIC_XOR  MI_ATOMIC_OP_MASK(0x03)
+#define MI_ATOMIC_MOVE MI_ATOMIC_OP_MASK(0x04)
+#define MI_ATOMIC_INC  MI_ATOMIC_OP_MASK(0x05)
+#define MI_ATOMIC_DEC  MI_ATOMIC_OP_MASK(0x06)
+#define MI_ATOMIC_ADD  MI_ATOMIC_OP_MASK(0x07)
+#define MI_ATOMIC_SUB  MI_ATOMIC_OP_MASK(0x08)
+#define MI_ATOMIC_RSUB MI_ATOMIC_OP_MASK(0x09)
+#define MI_ATOMIC_IMAX MI_ATOMIC_OP_MASK(0x0A)
+#define MI_ATOMIC_IMIN MI_ATOMIC_OP_MASK(0x0B)
+#define MI_ATOMIC_UMAX MI_ATOMIC_OP_MASK(0x0C)
+#define MI_ATOMIC_UMIN MI_ATOMIC_OP_MASK(0x0D)
+
   #define MI_BATCH_BUFFER  MI_INSTR(0x30, 1)
   #define   MI_BATCH_NON_SECURE(1)
   /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
@@ -410,6 +430,7 @@
   #define   DISPLAY_PLANE_A   (0<<20)
   #define   DISPLAY_PLANE_B   (1<<20)
   #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|(len-2))
+#define   PIPE_CONTROL_FLUSH_RO_CACHES (1<<27)

I think the consensus is to rename this to PIPE_CONTROL_FLUSH_L3, isn't it?


Yes, it will be renamed to PIPE_CONTROL_FLUSH_L3 in v2.


   #define   PIPE_CONTROL_GLOBAL_GTT_IVB(1<<24) /* 
gen7+ */
   #define   PIPE_CONTROL_MMIO_WRITE(1<<23)
   #define   PIPE_CONTROL_STORE_DATA_INDEX  (1<<21)
@@ -426,6 +447,7 @@
   #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE(1<<9)
   #define   PIPE_CONTROL_NOTIFY(1<<8)
   #define   PIPE_CONTROL_FLUSH_ENABLE  (1<<7) /* gen7+ */
+#define   PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5)
   #define   PIPE_CONTROL_VF_CACHE_INVALIDATE   (1<<4)
   #define   PIPE_CONTROL_CONST_CACHE_INVALIDATE(1<<3)
   #define   PIPE_CONTROL_STATE_CACHE_INVALIDATE(1<<2)
@@ -449,8 +471,10 @@
   #define MI_CLFLUSH  MI_INSTR(0x27, 0)
   #define MI_REPORT_PERF_COUNTMI_INSTR(0x28, 0)
   #define   MI_REPORT_PERF_COUNT_GG

Re: [Intel-gfx] [PATCH 2/2] drm/i915/gen8: Apply Per-context workarounds using W/A batch buffers

2015-03-02 Thread Michel Thierry



On 25/02/15 17:54, Arun Siluvery wrote:

Some of the workarounds are to be applied during context save but before
restore and some at the end of context save/restore but before executing
the instructions in the ring. Workaround batch buffers are created for
this purpose as they cannot be applied using normal means. HW executes
them at specific stages during context save/restore.

In this method we initialize batch buffer with w/a commands and its address
is supplied using context offset pointers when a context is initialized.

This patch introduces indirect and per-context batch buffers using which
following workarounds are applied. These are required to fix issues
observed with preemption related workloads.

In Indirect context w/a batch buffer,
+WaDisableCtxRestoreArbitration
+WaFlushCoherentL3CacheLinesAtContextSwitch
+WaClearSlmSpaceAtContextSwitch

In Per context w/a batch buffer,
+WaDisableCtxRestoreArbitration
+WaRsRestoreWithPerCtxtBb

v2: Use GTT address type for all privileged instructions, update as
per dynamic pinning changes, minor simplifications, rename variables
as follows to keep lines under 80 chars and rebase.
s/indirect_ctx_wa_ringbuf/indirect_ctx_wa_bb
s/per_ctx_wa_ringbuf/per_ctx_wa_bb

v3: Modify WA BB initialization to Gen specific.

Change-Id: I0cedb536b7f6d9f10ba9e81ba625848e7bab603c
Signed-off-by: Rafael Barbalho 
Signed-off-by: Arun Siluvery 
---
  drivers/gpu/drm/i915/i915_drv.h |   3 +
  drivers/gpu/drm/i915/i915_reg.h |  30 +++-
  drivers/gpu/drm/i915/intel_lrc.c| 302 +++-
  drivers/gpu/drm/i915/intel_ringbuffer.h |   3 +
  4 files changed, 297 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d42040f..86cdb52 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -774,6 +774,9 @@ struct intel_context {
  
  	/* Execlists */

bool rcs_initialized;
+   struct intel_ringbuffer *indirect_ctx_wa_bb;
+   struct intel_ringbuffer *per_ctx_wa_bb;
+
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 55143cb..eb41d7f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -347,6 +347,26 @@
  #define   MI_INVALIDATE_BSD   (1<<7)
  #define   MI_FLUSH_DW_USE_GTT (1<<2)
  #define   MI_FLUSH_DW_USE_PPGTT   (0<<2)
+#define MI_ATOMIC(len) MI_INSTR(0x2F, (len-2))
+#define   MI_ATOMIC_MEMORY_TYPE_GGTT   (1<<22)
+#define   MI_ATOMIC_INLINE_DATA(1<<18)
+#define   MI_ATOMIC_CS_STALL   (1<<17)
+#define   MI_ATOMIC_RETURN_DATA_CTL(1<<16)
+#define MI_ATOMIC_OP_MASK(op)  ((op) << 8)
+#define MI_ATOMIC_AND  MI_ATOMIC_OP_MASK(0x01)
+#define MI_ATOMIC_OR   MI_ATOMIC_OP_MASK(0x02)
+#define MI_ATOMIC_XOR  MI_ATOMIC_OP_MASK(0x03)
+#define MI_ATOMIC_MOVE MI_ATOMIC_OP_MASK(0x04)
+#define MI_ATOMIC_INC  MI_ATOMIC_OP_MASK(0x05)
+#define MI_ATOMIC_DEC  MI_ATOMIC_OP_MASK(0x06)
+#define MI_ATOMIC_ADD  MI_ATOMIC_OP_MASK(0x07)
+#define MI_ATOMIC_SUB  MI_ATOMIC_OP_MASK(0x08)
+#define MI_ATOMIC_RSUB MI_ATOMIC_OP_MASK(0x09)
+#define MI_ATOMIC_IMAX MI_ATOMIC_OP_MASK(0x0A)
+#define MI_ATOMIC_IMIN MI_ATOMIC_OP_MASK(0x0B)
+#define MI_ATOMIC_UMAX MI_ATOMIC_OP_MASK(0x0C)
+#define MI_ATOMIC_UMIN MI_ATOMIC_OP_MASK(0x0D)
+
  #define MI_BATCH_BUFFER   MI_INSTR(0x30, 1)
  #define   MI_BATCH_NON_SECURE (1)
  /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
@@ -410,6 +430,7 @@
  #define   DISPLAY_PLANE_A   (0<<20)
  #define   DISPLAY_PLANE_B   (1<<20)
  #define GFX_OP_PIPE_CONTROL(len)  ((0x3<<29)|(0x3<<27)|(0x2<<24)|(len-2))
+#define   PIPE_CONTROL_FLUSH_RO_CACHES (1<<27)

I think the consensus is to rename this to PIPE_CONTROL_FLUSH_L3, isn't it?

  #define   PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */
  #define   PIPE_CONTROL_MMIO_WRITE (1<<23)
  #define   PIPE_CONTROL_STORE_DATA_INDEX   (1<<21)
@@ -426,6 +447,7 @@
  #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9)
  #define   PIPE_CONTROL_NOTIFY (1<<8)
  #define   PIPE_CONTROL_FLUSH_ENABLE   (1<<7) /* gen7+ */
+#define   PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5)
  #define   PIPE_CONTROL_VF_CACHE_INVALIDATE(1<<4)
  #define   PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3)
  #define   PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2)
@@ -449,8 +471,10 @@
  #define MI_CLFLUSH  MI_INSTR(0x27, 0)
  #define MI_REPORT_PERF_COUNTMI_INSTR(0x28, 0)
  #define   MI_REPORT_PERF_COUNT_GGTT (1<<0)
-#define MI_LOAD_REGISTER_MEMMI_INSTR(0x29, 0)
-#define MI_LOAD_REGISTER_REGMI_INSTR(0x2A, 0)
+#define MI_LOAD_REGISTER_MEMMI_INSTR(0x29, 2)

Should thi

Re: [Intel-gfx] [PATCH 2/2] drm/i915/gen8: Apply Per-context workarounds using W/A batch buffers

2015-02-26 Thread shuang . he
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: 
shuang...@intel.com)
Task id: 5828
-Summary-
Platform  Delta  drm-intel-nightly  Series Applied
PNV -1  281/281  280/281
ILK  308/308  308/308
SNB  326/326  326/326
IVB  380/380  380/380
BYT  294/294  294/294
HSW  387/421  387/421
BDW -1  316/316  315/316
-Detailed-
Platform  Testdrm-intel-nightly  Series 
Applied
*PNV  igt_gen3_render_mixed_blits  PASS(7)  CRASH(1)PASS(1)
*BDW  igt_gem_gtt_hog  PASS(12)  DMESG_WARN(1)PASS(1)
Note: You need to pay more attention to line start with '*'
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915/gen8: Apply Per-context workarounds using W/A batch buffers

2015-02-25 Thread Arun Siluvery
Some of the workarounds are to be applied during context save but before
restore and some at the end of context save/restore but before executing
the instructions in the ring. Workaround batch buffers are created for
this purpose as they cannot be applied using normal means. HW executes
them at specific stages during context save/restore.

In this method we initialize batch buffer with w/a commands and its address
is supplied using context offset pointers when a context is initialized.

This patch introduces indirect and per-context batch buffers using which
following workarounds are applied. These are required to fix issues
observed with preemption related workloads.

In Indirect context w/a batch buffer,
+WaDisableCtxRestoreArbitration
+WaFlushCoherentL3CacheLinesAtContextSwitch
+WaClearSlmSpaceAtContextSwitch

In Per context w/a batch buffer,
+WaDisableCtxRestoreArbitration
+WaRsRestoreWithPerCtxtBb

v2: Use GTT address type for all privileged instructions, update as
per dynamic pinning changes, minor simplifications, rename variables
as follows to keep lines under 80 chars and rebase.
s/indirect_ctx_wa_ringbuf/indirect_ctx_wa_bb
s/per_ctx_wa_ringbuf/per_ctx_wa_bb

v3: Modify WA BB initialization to Gen specific.

Change-Id: I0cedb536b7f6d9f10ba9e81ba625848e7bab603c
Signed-off-by: Rafael Barbalho 
Signed-off-by: Arun Siluvery 
---
 drivers/gpu/drm/i915/i915_drv.h |   3 +
 drivers/gpu/drm/i915/i915_reg.h |  30 +++-
 drivers/gpu/drm/i915/intel_lrc.c| 302 +++-
 drivers/gpu/drm/i915/intel_ringbuffer.h |   3 +
 4 files changed, 297 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d42040f..86cdb52 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -774,6 +774,9 @@ struct intel_context {
 
/* Execlists */
bool rcs_initialized;
+   struct intel_ringbuffer *indirect_ctx_wa_bb;
+   struct intel_ringbuffer *per_ctx_wa_bb;
+
struct {
struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 55143cb..eb41d7f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -347,6 +347,26 @@
 #define   MI_INVALIDATE_BSD(1<<7)
 #define   MI_FLUSH_DW_USE_GTT  (1<<2)
 #define   MI_FLUSH_DW_USE_PPGTT(0<<2)
+#define MI_ATOMIC(len) MI_INSTR(0x2F, (len-2))
+#define   MI_ATOMIC_MEMORY_TYPE_GGTT   (1<<22)
+#define   MI_ATOMIC_INLINE_DATA(1<<18)
+#define   MI_ATOMIC_CS_STALL   (1<<17)
+#define   MI_ATOMIC_RETURN_DATA_CTL(1<<16)
+#define MI_ATOMIC_OP_MASK(op)  ((op) << 8)
+#define MI_ATOMIC_AND  MI_ATOMIC_OP_MASK(0x01)
+#define MI_ATOMIC_OR   MI_ATOMIC_OP_MASK(0x02)
+#define MI_ATOMIC_XOR  MI_ATOMIC_OP_MASK(0x03)
+#define MI_ATOMIC_MOVE MI_ATOMIC_OP_MASK(0x04)
+#define MI_ATOMIC_INC  MI_ATOMIC_OP_MASK(0x05)
+#define MI_ATOMIC_DEC  MI_ATOMIC_OP_MASK(0x06)
+#define MI_ATOMIC_ADD  MI_ATOMIC_OP_MASK(0x07)
+#define MI_ATOMIC_SUB  MI_ATOMIC_OP_MASK(0x08)
+#define MI_ATOMIC_RSUB MI_ATOMIC_OP_MASK(0x09)
+#define MI_ATOMIC_IMAX MI_ATOMIC_OP_MASK(0x0A)
+#define MI_ATOMIC_IMIN MI_ATOMIC_OP_MASK(0x0B)
+#define MI_ATOMIC_UMAX MI_ATOMIC_OP_MASK(0x0C)
+#define MI_ATOMIC_UMIN MI_ATOMIC_OP_MASK(0x0D)
+
 #define MI_BATCH_BUFFERMI_INSTR(0x30, 1)
 #define   MI_BATCH_NON_SECURE  (1)
 /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
@@ -410,6 +430,7 @@
 #define   DISPLAY_PLANE_A   (0<<20)
 #define   DISPLAY_PLANE_B   (1<<20)
 #define GFX_OP_PIPE_CONTROL(len)   ((0x3<<29)|(0x3<<27)|(0x2<<24)|(len-2))
+#define   PIPE_CONTROL_FLUSH_RO_CACHES (1<<27)
 #define   PIPE_CONTROL_GLOBAL_GTT_IVB  (1<<24) /* gen7+ */
 #define   PIPE_CONTROL_MMIO_WRITE  (1<<23)
 #define   PIPE_CONTROL_STORE_DATA_INDEX(1<<21)
@@ -426,6 +447,7 @@
 #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE  (1<<9)
 #define   PIPE_CONTROL_NOTIFY  (1<<8)
 #define   PIPE_CONTROL_FLUSH_ENABLE(1<<7) /* gen7+ */
+#define   PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5)
 #define   PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4)
 #define   PIPE_CONTROL_CONST_CACHE_INVALIDATE  (1<<3)
 #define   PIPE_CONTROL_STATE_CACHE_INVALIDATE  (1<<2)
@@ -449,8 +471,10 @@
 #define MI_CLFLUSH  MI_INSTR(0x27, 0)
 #define MI_REPORT_PERF_COUNTMI_INSTR(0x28, 0)
 #define   MI_REPORT_PERF_COUNT_GGTT (1<<0)
-#define MI_LOAD_REGISTER_MEMMI_INSTR(0x29, 0)
-#define MI_LOAD_REGISTER_REGMI_INSTR(0x2A, 0)
+#define MI_LOAD_REGISTER_MEMMI_INSTR(0x29, 2)
+#define MI_LRM_USE_GLOBAL_GTT (1<<22)
+#define MI_LRM_ASYNC_MODE_ENABLE (1<<21)
+#define MI_LOAD_REGISTER_REGMI_INSTR(0x2A, 1)
 #define