Re: [Intel-gfx] [PATCH 2/2] drm/i915/gen8: Apply Per-context workarounds using W/A batch buffers
On 02/03/2015 10:10, Michel Thierry wrote: On 25/02/15 17:54, Arun Siluvery wrote: Some of the workarounds are to be applied during context save but before restore and some at the end of context save/restore but before executing the instructions in the ring. Workaround batch buffers are created for this purpose as they cannot be applied using normal means. HW executes them at specific stages during context save/restore. In this method we initialize batch buffer with w/a commands and its address is supplied using context offset pointers when a context is initialized. This patch introduces indirect and per-context batch buffers using which following workarounds are applied. These are required to fix issues observed with preemption related workloads. In Indirect context w/a batch buffer, +WaDisableCtxRestoreArbitration +WaFlushCoherentL3CacheLinesAtContextSwitch +WaClearSlmSpaceAtContextSwitch In Per context w/a batch buffer, +WaDisableCtxRestoreArbitration +WaRsRestoreWithPerCtxtBb v2: Use GTT address type for all privileged instructions, update as per dynamic pinning changes, minor simplifications, rename variables as follows to keep lines under 80 chars and rebase. s/indirect_ctx_wa_ringbuf/indirect_ctx_wa_bb s/per_ctx_wa_ringbuf/per_ctx_wa_bb v3: Modify WA BB initialization to Gen specific. Change-Id: I0cedb536b7f6d9f10ba9e81ba625848e7bab603c Signed-off-by: Rafael Barbalho Signed-off-by: Arun Siluvery --- drivers/gpu/drm/i915/i915_drv.h | 3 + drivers/gpu/drm/i915/i915_reg.h | 30 +++- drivers/gpu/drm/i915/intel_lrc.c| 302 +++- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 + 4 files changed, 297 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d42040f..86cdb52 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -774,6 +774,9 @@ struct intel_context { /* Execlists */ bool rcs_initialized; + struct intel_ringbuffer *indirect_ctx_wa_bb; + struct intel_ringbuffer *per_ctx_wa_bb; + struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 55143cb..eb41d7f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -347,6 +347,26 @@ #define MI_INVALIDATE_BSD (1<<7) #define MI_FLUSH_DW_USE_GTT(1<<2) #define MI_FLUSH_DW_USE_PPGTT (0<<2) +#define MI_ATOMIC(len) MI_INSTR(0x2F, (len-2)) +#define MI_ATOMIC_MEMORY_TYPE_GGTT (1<<22) +#define MI_ATOMIC_INLINE_DATA(1<<18) +#define MI_ATOMIC_CS_STALL (1<<17) +#define MI_ATOMIC_RETURN_DATA_CTL(1<<16) +#define MI_ATOMIC_OP_MASK(op) ((op) << 8) +#define MI_ATOMIC_AND MI_ATOMIC_OP_MASK(0x01) +#define MI_ATOMIC_OR MI_ATOMIC_OP_MASK(0x02) +#define MI_ATOMIC_XOR MI_ATOMIC_OP_MASK(0x03) +#define MI_ATOMIC_MOVE MI_ATOMIC_OP_MASK(0x04) +#define MI_ATOMIC_INC MI_ATOMIC_OP_MASK(0x05) +#define MI_ATOMIC_DEC MI_ATOMIC_OP_MASK(0x06) +#define MI_ATOMIC_ADD MI_ATOMIC_OP_MASK(0x07) +#define MI_ATOMIC_SUB MI_ATOMIC_OP_MASK(0x08) +#define MI_ATOMIC_RSUB MI_ATOMIC_OP_MASK(0x09) +#define MI_ATOMIC_IMAX MI_ATOMIC_OP_MASK(0x0A) +#define MI_ATOMIC_IMIN MI_ATOMIC_OP_MASK(0x0B) +#define MI_ATOMIC_UMAX MI_ATOMIC_OP_MASK(0x0C) +#define MI_ATOMIC_UMIN MI_ATOMIC_OP_MASK(0x0D) + #define MI_BATCH_BUFFER MI_INSTR(0x30, 1) #define MI_BATCH_NON_SECURE(1) /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ @@ -410,6 +430,7 @@ #define DISPLAY_PLANE_A (0<<20) #define DISPLAY_PLANE_B (1<<20) #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|(len-2)) +#define PIPE_CONTROL_FLUSH_RO_CACHES (1<<27) I think the consensus is to rename this to PIPE_CONTROL_FLUSH_L3, isn't it? Yes, it will be renamed to PIPE_CONTROL_FLUSH_L3 in v2. #define PIPE_CONTROL_GLOBAL_GTT_IVB(1<<24) /* gen7+ */ #define PIPE_CONTROL_MMIO_WRITE(1<<23) #define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) @@ -426,6 +447,7 @@ #define PIPE_CONTROL_INDIRECT_STATE_DISABLE(1<<9) #define PIPE_CONTROL_NOTIFY(1<<8) #define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ +#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4) #define PIPE_CONTROL_CONST_CACHE_INVALIDATE(1<<3) #define PIPE_CONTROL_STATE_CACHE_INVALIDATE(1<<2) @@ -449,8 +471,10 @@ #define MI_CLFLUSH MI_INSTR(0x27, 0) #define MI_REPORT_PERF_COUNTMI_INSTR(0x28, 0) #define MI_REPORT_PERF_COUNT_GG
Re: [Intel-gfx] [PATCH 2/2] drm/i915/gen8: Apply Per-context workarounds using W/A batch buffers
On 25/02/15 17:54, Arun Siluvery wrote: Some of the workarounds are to be applied during context save but before restore and some at the end of context save/restore but before executing the instructions in the ring. Workaround batch buffers are created for this purpose as they cannot be applied using normal means. HW executes them at specific stages during context save/restore. In this method we initialize batch buffer with w/a commands and its address is supplied using context offset pointers when a context is initialized. This patch introduces indirect and per-context batch buffers using which following workarounds are applied. These are required to fix issues observed with preemption related workloads. In Indirect context w/a batch buffer, +WaDisableCtxRestoreArbitration +WaFlushCoherentL3CacheLinesAtContextSwitch +WaClearSlmSpaceAtContextSwitch In Per context w/a batch buffer, +WaDisableCtxRestoreArbitration +WaRsRestoreWithPerCtxtBb v2: Use GTT address type for all privileged instructions, update as per dynamic pinning changes, minor simplifications, rename variables as follows to keep lines under 80 chars and rebase. s/indirect_ctx_wa_ringbuf/indirect_ctx_wa_bb s/per_ctx_wa_ringbuf/per_ctx_wa_bb v3: Modify WA BB initialization to Gen specific. Change-Id: I0cedb536b7f6d9f10ba9e81ba625848e7bab603c Signed-off-by: Rafael Barbalho Signed-off-by: Arun Siluvery --- drivers/gpu/drm/i915/i915_drv.h | 3 + drivers/gpu/drm/i915/i915_reg.h | 30 +++- drivers/gpu/drm/i915/intel_lrc.c| 302 +++- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 + 4 files changed, 297 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d42040f..86cdb52 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -774,6 +774,9 @@ struct intel_context { /* Execlists */ bool rcs_initialized; + struct intel_ringbuffer *indirect_ctx_wa_bb; + struct intel_ringbuffer *per_ctx_wa_bb; + struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 55143cb..eb41d7f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -347,6 +347,26 @@ #define MI_INVALIDATE_BSD (1<<7) #define MI_FLUSH_DW_USE_GTT (1<<2) #define MI_FLUSH_DW_USE_PPGTT (0<<2) +#define MI_ATOMIC(len) MI_INSTR(0x2F, (len-2)) +#define MI_ATOMIC_MEMORY_TYPE_GGTT (1<<22) +#define MI_ATOMIC_INLINE_DATA(1<<18) +#define MI_ATOMIC_CS_STALL (1<<17) +#define MI_ATOMIC_RETURN_DATA_CTL(1<<16) +#define MI_ATOMIC_OP_MASK(op) ((op) << 8) +#define MI_ATOMIC_AND MI_ATOMIC_OP_MASK(0x01) +#define MI_ATOMIC_OR MI_ATOMIC_OP_MASK(0x02) +#define MI_ATOMIC_XOR MI_ATOMIC_OP_MASK(0x03) +#define MI_ATOMIC_MOVE MI_ATOMIC_OP_MASK(0x04) +#define MI_ATOMIC_INC MI_ATOMIC_OP_MASK(0x05) +#define MI_ATOMIC_DEC MI_ATOMIC_OP_MASK(0x06) +#define MI_ATOMIC_ADD MI_ATOMIC_OP_MASK(0x07) +#define MI_ATOMIC_SUB MI_ATOMIC_OP_MASK(0x08) +#define MI_ATOMIC_RSUB MI_ATOMIC_OP_MASK(0x09) +#define MI_ATOMIC_IMAX MI_ATOMIC_OP_MASK(0x0A) +#define MI_ATOMIC_IMIN MI_ATOMIC_OP_MASK(0x0B) +#define MI_ATOMIC_UMAX MI_ATOMIC_OP_MASK(0x0C) +#define MI_ATOMIC_UMIN MI_ATOMIC_OP_MASK(0x0D) + #define MI_BATCH_BUFFER MI_INSTR(0x30, 1) #define MI_BATCH_NON_SECURE (1) /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ @@ -410,6 +430,7 @@ #define DISPLAY_PLANE_A (0<<20) #define DISPLAY_PLANE_B (1<<20) #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|(len-2)) +#define PIPE_CONTROL_FLUSH_RO_CACHES (1<<27) I think the consensus is to rename this to PIPE_CONTROL_FLUSH_L3, isn't it? #define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */ #define PIPE_CONTROL_MMIO_WRITE (1<<23) #define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) @@ -426,6 +447,7 @@ #define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) #define PIPE_CONTROL_NOTIFY (1<<8) #define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ +#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) #define PIPE_CONTROL_VF_CACHE_INVALIDATE(1<<4) #define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3) #define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2) @@ -449,8 +471,10 @@ #define MI_CLFLUSH MI_INSTR(0x27, 0) #define MI_REPORT_PERF_COUNTMI_INSTR(0x28, 0) #define MI_REPORT_PERF_COUNT_GGTT (1<<0) -#define MI_LOAD_REGISTER_MEMMI_INSTR(0x29, 0) -#define MI_LOAD_REGISTER_REGMI_INSTR(0x2A, 0) +#define MI_LOAD_REGISTER_MEMMI_INSTR(0x29, 2) Should thi
Re: [Intel-gfx] [PATCH 2/2] drm/i915/gen8: Apply Per-context workarounds using W/A batch buffers
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang...@intel.com) Task id: 5828 -Summary- Platform Delta drm-intel-nightly Series Applied PNV -1 281/281 280/281 ILK 308/308 308/308 SNB 326/326 326/326 IVB 380/380 380/380 BYT 294/294 294/294 HSW 387/421 387/421 BDW -1 316/316 315/316 -Detailed- Platform Testdrm-intel-nightly Series Applied *PNV igt_gen3_render_mixed_blits PASS(7) CRASH(1)PASS(1) *BDW igt_gem_gtt_hog PASS(12) DMESG_WARN(1)PASS(1) Note: You need to pay more attention to line start with '*' ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915/gen8: Apply Per-context workarounds using W/A batch buffers
Some of the workarounds are to be applied during context save but before restore and some at the end of context save/restore but before executing the instructions in the ring. Workaround batch buffers are created for this purpose as they cannot be applied using normal means. HW executes them at specific stages during context save/restore. In this method we initialize batch buffer with w/a commands and its address is supplied using context offset pointers when a context is initialized. This patch introduces indirect and per-context batch buffers using which following workarounds are applied. These are required to fix issues observed with preemption related workloads. In Indirect context w/a batch buffer, +WaDisableCtxRestoreArbitration +WaFlushCoherentL3CacheLinesAtContextSwitch +WaClearSlmSpaceAtContextSwitch In Per context w/a batch buffer, +WaDisableCtxRestoreArbitration +WaRsRestoreWithPerCtxtBb v2: Use GTT address type for all privileged instructions, update as per dynamic pinning changes, minor simplifications, rename variables as follows to keep lines under 80 chars and rebase. s/indirect_ctx_wa_ringbuf/indirect_ctx_wa_bb s/per_ctx_wa_ringbuf/per_ctx_wa_bb v3: Modify WA BB initialization to Gen specific. Change-Id: I0cedb536b7f6d9f10ba9e81ba625848e7bab603c Signed-off-by: Rafael Barbalho Signed-off-by: Arun Siluvery --- drivers/gpu/drm/i915/i915_drv.h | 3 + drivers/gpu/drm/i915/i915_reg.h | 30 +++- drivers/gpu/drm/i915/intel_lrc.c| 302 +++- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 + 4 files changed, 297 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d42040f..86cdb52 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -774,6 +774,9 @@ struct intel_context { /* Execlists */ bool rcs_initialized; + struct intel_ringbuffer *indirect_ctx_wa_bb; + struct intel_ringbuffer *per_ctx_wa_bb; + struct { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 55143cb..eb41d7f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -347,6 +347,26 @@ #define MI_INVALIDATE_BSD(1<<7) #define MI_FLUSH_DW_USE_GTT (1<<2) #define MI_FLUSH_DW_USE_PPGTT(0<<2) +#define MI_ATOMIC(len) MI_INSTR(0x2F, (len-2)) +#define MI_ATOMIC_MEMORY_TYPE_GGTT (1<<22) +#define MI_ATOMIC_INLINE_DATA(1<<18) +#define MI_ATOMIC_CS_STALL (1<<17) +#define MI_ATOMIC_RETURN_DATA_CTL(1<<16) +#define MI_ATOMIC_OP_MASK(op) ((op) << 8) +#define MI_ATOMIC_AND MI_ATOMIC_OP_MASK(0x01) +#define MI_ATOMIC_OR MI_ATOMIC_OP_MASK(0x02) +#define MI_ATOMIC_XOR MI_ATOMIC_OP_MASK(0x03) +#define MI_ATOMIC_MOVE MI_ATOMIC_OP_MASK(0x04) +#define MI_ATOMIC_INC MI_ATOMIC_OP_MASK(0x05) +#define MI_ATOMIC_DEC MI_ATOMIC_OP_MASK(0x06) +#define MI_ATOMIC_ADD MI_ATOMIC_OP_MASK(0x07) +#define MI_ATOMIC_SUB MI_ATOMIC_OP_MASK(0x08) +#define MI_ATOMIC_RSUB MI_ATOMIC_OP_MASK(0x09) +#define MI_ATOMIC_IMAX MI_ATOMIC_OP_MASK(0x0A) +#define MI_ATOMIC_IMIN MI_ATOMIC_OP_MASK(0x0B) +#define MI_ATOMIC_UMAX MI_ATOMIC_OP_MASK(0x0C) +#define MI_ATOMIC_UMIN MI_ATOMIC_OP_MASK(0x0D) + #define MI_BATCH_BUFFERMI_INSTR(0x30, 1) #define MI_BATCH_NON_SECURE (1) /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ @@ -410,6 +430,7 @@ #define DISPLAY_PLANE_A (0<<20) #define DISPLAY_PLANE_B (1<<20) #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|(len-2)) +#define PIPE_CONTROL_FLUSH_RO_CACHES (1<<27) #define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */ #define PIPE_CONTROL_MMIO_WRITE (1<<23) #define PIPE_CONTROL_STORE_DATA_INDEX(1<<21) @@ -426,6 +447,7 @@ #define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) #define PIPE_CONTROL_NOTIFY (1<<8) #define PIPE_CONTROL_FLUSH_ENABLE(1<<7) /* gen7+ */ +#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4) #define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3) #define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2) @@ -449,8 +471,10 @@ #define MI_CLFLUSH MI_INSTR(0x27, 0) #define MI_REPORT_PERF_COUNTMI_INSTR(0x28, 0) #define MI_REPORT_PERF_COUNT_GGTT (1<<0) -#define MI_LOAD_REGISTER_MEMMI_INSTR(0x29, 0) -#define MI_LOAD_REGISTER_REGMI_INSTR(0x2A, 0) +#define MI_LOAD_REGISTER_MEMMI_INSTR(0x29, 2) +#define MI_LRM_USE_GLOBAL_GTT (1<<22) +#define MI_LRM_ASYNC_MODE_ENABLE (1<<21) +#define MI_LOAD_REGISTER_REGMI_INSTR(0x2A, 1) #define