On Wed, 2012-07-11 at 09:16 +0200, Gwenole Beauchesne wrote: > From: Daniel Vetter <daniel.vet...@ffwll.ch> > > Sandybdrige requires an elaborate dance to flush caches without > hanging the gpu. See public docs Vol2Part1 1.7.4.1 PIPE_CONTROL > or the corrensponding code in mesa/kernel. > > v2: Incorporate review from Chris Wilson. For paranoia keep all three > PIPE_CONTROL cmds in the same batchbuffer to avoid upsetting the gpu. > > Signed-off-by: Daniel Vetter <daniel.vet...@ffwll.ch> > [Gwenole: merged from xf86-video-intel] > Signed-off-by: Gwenole Beauchesne <gwenole.beauche...@intel.com> > --- > src/intel_batchbuffer.c | 54 +++++++++++++++++++++++++++++++++++----------- > src/intel_driver.h | 3 ++ > src/intel_memman.c | 6 +++++ > 3 files changed, 50 insertions(+), 13 deletions(-) > > diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c > index 0b52281..77a2c90 100644 > --- a/src/intel_batchbuffer.c > +++ b/src/intel_batchbuffer.c > @@ -154,6 +154,36 @@ intel_batchbuffer_data(struct intel_batchbuffer *batch, > batch->ptr += size; > } > > +static void > +intel_batchbuffer_emit_post_sync_nonzero_flush(struct intel_batchbuffer > *batch) > +{ > + struct intel_driver_data * const intel = batch->intel; > + > + /* Keep this entire sequence of 3 PIPE_CONTROL cmds in one batch to > + avoid upsetting the gpu. */ > + BEGIN_BATCH(batch, 3*4); > + OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2)); > + OUT_BATCH(batch, (CMD_PIPE_CONTROL_CS_STALL | > + CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD)); > + OUT_BATCH(batch, 0); /* address */ > + OUT_BATCH(batch, 0); /* write data */ > + > + OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2)); > + OUT_BATCH(batch, CMD_PIPE_CONTROL_WRITE_QWORD); > + OUT_RELOC(batch, intel->wa_scratch_bo, > + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); > + OUT_BATCH(batch, 0); /* write data */
The length (DW0[7:0]) is 3 for QWORD write. The batch buffer used on Linux is a non-secure batch buffer, so the address given here must be in a PPGTT address, which is valid only with the Linux 3.4+(?) kernel. In addition, is this operation needed ? The doc only says some store data commands are needed for TLB invalidate. > + > + /* now finally the _real flush */ > + OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2)); > + OUT_BATCH(batch, (CMD_PIPE_CONTROL_WC_FLUSH | > + CMD_PIPE_CONTROL_TC_FLUSH | > + CMD_PIPE_CONTROL_NOWRITE)); > + OUT_BATCH(batch, 0); /* write address */ > + OUT_BATCH(batch, 0); /* write data */ > + ADVANCE_BATCH(batch); > +} > + > void > intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) > { > @@ -162,24 +192,22 @@ intel_batchbuffer_emit_mi_flush(struct > intel_batchbuffer *batch) > if (IS_GEN6(intel->device_id) || > IS_GEN7(intel->device_id)) { > if (batch->flag == I915_EXEC_RENDER) { > - BEGIN_BATCH(batch, 4); > - OUT_BATCH(batch, CMD_PIPE_CONTROL | 0x2); > - > - if (IS_GEN6(intel->device_id)) > - OUT_BATCH(batch, > - CMD_PIPE_CONTROL_WC_FLUSH | > - CMD_PIPE_CONTROL_TC_FLUSH | > - CMD_PIPE_CONTROL_NOWRITE); > - else > + if (IS_GEN6(intel->device_id)) { > + /* HW workaround for Sandy Bridge */ > + intel_batchbuffer_emit_post_sync_nonzero_flush(batch); > + } > + else { > + BEGIN_BATCH(batch, 4); > + OUT_BATCH(batch, CMD_PIPE_CONTROL | 0x2); > OUT_BATCH(batch, > CMD_PIPE_CONTROL_WC_FLUSH | > CMD_PIPE_CONTROL_TC_FLUSH | > CMD_PIPE_CONTROL_DC_FLUSH | > CMD_PIPE_CONTROL_NOWRITE); > - > - OUT_BATCH(batch, 0); > - OUT_BATCH(batch, 0); > - ADVANCE_BATCH(batch); > + OUT_BATCH(batch, 0); > + OUT_BATCH(batch, 0); > + ADVANCE_BATCH(batch); > + } > } else { > if (batch->flag == I915_EXEC_BLT) { > BEGIN_BLT_BATCH(batch, 4); > diff --git a/src/intel_driver.h b/src/intel_driver.h > index b383218..ad95e41 100644 > --- a/src/intel_driver.h > +++ b/src/intel_driver.h > @@ -42,6 +42,7 @@ > #define BR13_8888 (0x3 << 24) > > #define CMD_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << > 24) | (0 << 16)) > +#define CMD_PIPE_CONTROL_CS_STALL (1 << 20) > #define CMD_PIPE_CONTROL_NOWRITE (0 << 14) > #define CMD_PIPE_CONTROL_WRITE_QWORD (1 << 14) > #define CMD_PIPE_CONTROL_WRITE_DEPTH (2 << 14) > @@ -54,6 +55,7 @@ > #define CMD_PIPE_CONTROL_DC_FLUSH (1 << 5) > #define CMD_PIPE_CONTROL_GLOBAL_GTT (1 << 2) > #define CMD_PIPE_CONTROL_LOCAL_PGTT (0 << 2) > +#define CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) > #define CMD_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) > > > @@ -116,6 +118,7 @@ struct intel_driver_data > int locked; > > dri_bufmgr *bufmgr; > + dri_bo *wa_scratch_bo; > > unsigned int has_exec2 : 1; /* Flag: has execbuffer2? */ > unsigned int has_bsd : 1; /* Flag: has bitstream decoder for H.264? */ > diff --git a/src/intel_memman.c b/src/intel_memman.c > index 7d56e96..cde267e 100644 > --- a/src/intel_memman.c > +++ b/src/intel_memman.c > @@ -38,12 +38,18 @@ intel_memman_init(struct intel_driver_data *intel) > assert(intel->bufmgr); > intel_bufmgr_gem_enable_reuse(intel->bufmgr); > > + if (IS_GEN6(intel->device_id)) { > + intel->wa_scratch_bo = > + drm_intel_bo_alloc(intel->bufmgr, "wa scratch", 4096, 4096); > + assert(intel->wa_scratch_bo); > + } > return True; > } > > Bool > intel_memman_terminate(struct intel_driver_data *intel) > { > + drm_intel_bo_unreference(intel->wa_scratch_bo); > drm_intel_bufmgr_destroy(intel->bufmgr); > return True; > } _______________________________________________ Libva mailing list Libva@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libva