On Thu, Nov 01, 2018 at 08:04:20PM -0700, Kenneth Graunke wrote: > While this does add a bunch of boilerplate, it also protects us against > the hardware moving bits, or changing their meaning. For something as > finnicky as PIPE_CONTROL, the extra safety seems worth it. > > We turn PIPE_CONTROL_* into an bitfield of arbitrary flags, and then > pack them appropriately.
This is clearly better than before in my opinion. Few suggestions below but patches 1-3 are: Reviewed-by: Topi Pohjolainen <topi.pohjolai...@intel.com> > --- > src/mesa/drivers/dri/i965/Makefile.sources | 9 + > src/mesa/drivers/dri/i965/brw_context.h | 4 + > src/mesa/drivers/dri/i965/brw_pipe_control.c | 240 +++-------------- > src/mesa/drivers/dri/i965/brw_pipe_control.h | 58 +++-- > src/mesa/drivers/dri/i965/brw_state.h | 31 +++ > src/mesa/drivers/dri/i965/genX_pipe_control.c | 243 ++++++++++++++++++ > src/mesa/drivers/dri/i965/meson.build | 4 +- > 7 files changed, 359 insertions(+), 230 deletions(-) > create mode 100644 src/mesa/drivers/dri/i965/genX_pipe_control.c > > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources > b/src/mesa/drivers/dri/i965/Makefile.sources > index 63fa7b886f2..e4eb0339e09 100644 > --- a/src/mesa/drivers/dri/i965/Makefile.sources > +++ b/src/mesa/drivers/dri/i965/Makefile.sources > @@ -127,46 +127,55 @@ intel_tiled_memcpy_dep_FILES = \ > i965_gen4_FILES = \ > genX_blorp_exec.c \ > genX_boilerplate.h \ > + genX_pipe_control.c \ > genX_state_upload.c > > i965_gen45_FILES = \ > genX_blorp_exec.c \ > genX_boilerplate.h \ > + genX_pipe_control.c \ > genX_state_upload.c > > i965_gen5_FILES = \ > genX_blorp_exec.c \ > genX_boilerplate.h \ > + genX_pipe_control.c \ > genX_state_upload.c > > i965_gen6_FILES = \ > genX_blorp_exec.c \ > genX_boilerplate.h \ > + genX_pipe_control.c \ > genX_state_upload.c > > i965_gen7_FILES = \ > genX_blorp_exec.c \ > genX_boilerplate.h \ > + genX_pipe_control.c \ > genX_state_upload.c > > i965_gen75_FILES = \ > genX_blorp_exec.c \ > genX_boilerplate.h \ > + genX_pipe_control.c \ > genX_state_upload.c > > i965_gen8_FILES = \ > genX_blorp_exec.c \ > genX_boilerplate.h \ > + genX_pipe_control.c \ > genX_state_upload.c > > i965_gen9_FILES = \ > genX_blorp_exec.c \ > genX_boilerplate.h \ > + genX_pipe_control.c \ > genX_state_upload.c > > i965_gen10_FILES = \ > genX_blorp_exec.c \ > genX_boilerplate.h \ > + genX_pipe_control.c \ > genX_state_upload.c > > i965_gen11_FILES = \ > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index 7fd15669eb9..fe75425854c 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -752,6 +752,10 @@ struct brw_context > struct brw_bo *bo, > uint32_t offset_in_bytes, > uint32_t report_id); > + > + void (*emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags, > + struct brw_bo *bo, uint32_t offset, > + uint64_t imm); > } vtbl; > > struct brw_bufmgr *bufmgr; > diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c > b/src/mesa/drivers/dri/i965/brw_pipe_control.c > index 4d76e5dc9b7..cf9cc35875f 100644 > --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c > +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c > @@ -23,200 +23,10 @@ > > #include "brw_context.h" > #include "brw_defines.h" > +#include "brw_state.h" > #include "intel_batchbuffer.h" > #include "intel_fbo.h" > > -/** > - * According to the latest documentation, any PIPE_CONTROL with the > - * "Command Streamer Stall" bit set must also have another bit set, > - * with five different options: > - * > - * - Render Target Cache Flush > - * - Depth Cache Flush > - * - Stall at Pixel Scoreboard > - * - Post-Sync Operation > - * - Depth Stall > - * - DC Flush Enable > - * > - * I chose "Stall at Pixel Scoreboard" since we've used it effectively > - * in the past, but the choice is fairly arbitrary. > - */ > -static void > -gen8_add_cs_stall_workaround_bits(uint32_t *flags) > -{ > - uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH | > - PIPE_CONTROL_DEPTH_CACHE_FLUSH | > - PIPE_CONTROL_WRITE_IMMEDIATE | > - PIPE_CONTROL_WRITE_DEPTH_COUNT | > - PIPE_CONTROL_WRITE_TIMESTAMP | > - PIPE_CONTROL_STALL_AT_SCOREBOARD | > - PIPE_CONTROL_DEPTH_STALL | > - PIPE_CONTROL_DATA_CACHE_FLUSH; > - > - /* If we're doing a CS stall, and don't already have one of the > - * workaround bits set, add "Stall at Pixel Scoreboard." > - */ > - if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0) > - *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; > -} > - > -/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT: > - * > - * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with > - * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set." > - * > - * Note that the kernel does CS stalls between batches, so we only need > - * to count them within a batch. > - */ > -static uint32_t > -gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t > flags) > -{ > - const struct gen_device_info *devinfo = &brw->screen->devinfo; > - > - if (devinfo->gen == 7 && !devinfo->is_haswell) { > - if (flags & PIPE_CONTROL_CS_STALL) { > - /* If we're doing a CS stall, reset the counter and carry on. */ > - brw->pipe_controls_since_last_cs_stall = 0; > - return 0; > - } > - > - /* If this is the fourth pipe control without a CS stall, do one now. > */ > - if (++brw->pipe_controls_since_last_cs_stall == 4) { > - brw->pipe_controls_since_last_cs_stall = 0; > - return PIPE_CONTROL_CS_STALL; > - } > - } > - return 0; > -} > - > -/* #1130 from gen10 workarounds page in h/w specs: > - * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is > - * not enabled in same PIPE CONTROL and Enable Pixel score board stall if > - * Render target cache flush is enabled." > - * > - * Applicable to CNL B0 and C0 steppings only. > - */ > -static void > -gen10_add_rcpfe_workaround_bits(uint32_t *flags) > -{ > - if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) { > - *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD; > - } else if (*flags & > - (PIPE_CONTROL_WRITE_IMMEDIATE | > - PIPE_CONTROL_WRITE_DEPTH_COUNT | > - PIPE_CONTROL_WRITE_TIMESTAMP)) { > - *flags = *flags | PIPE_CONTROL_DEPTH_STALL; > - } > -} > - > -static void > -brw_emit_pipe_control(struct brw_context *brw, uint32_t flags, > - struct brw_bo *bo, uint32_t offset, uint64_t imm) > -{ > - const struct gen_device_info *devinfo = &brw->screen->devinfo; > - > - if (devinfo->gen >= 8) { > - if (devinfo->gen == 8) > - gen8_add_cs_stall_workaround_bits(&flags); > - > - if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) { > - if (devinfo->gen == 9) { > - /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit > description > - * lists several workarounds: > - * > - * "Project: SKL, KBL, BXT > - * > - * If the VF Cache Invalidation Enable is set to a 1 in a > - * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields > - * sets to 0, with the VF Cache Invalidation Enable set to 0 > - * needs to be sent prior to the PIPE_CONTROL with VF Cache > - * Invalidation Enable set to a 1." > - */ > - brw_emit_pipe_control_flush(brw, 0); > - } > - > - if (devinfo->gen >= 9) { > - /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue: > - * > - * "Project: BDW+ > - * > - * When VF Cache Invalidate is set “Post Sync Operation” must > - * be enabled to “Write Immediate Data” or “Write PS Depth > - * Count” or “Write Timestamp”." > - * > - * If there's a BO, we're already doing some kind of write. > - * If not, add a write to the workaround BO. > - * > - * XXX: This causes GPU hangs on Broadwell, so restrict it to > - * Gen9+ for now...see this bug for more information: > - * https://bugs.freedesktop.org/show_bug.cgi?id=103787 > - */ > - if (!bo) { > - flags |= PIPE_CONTROL_WRITE_IMMEDIATE; > - bo = brw->workaround_bo; > - } > - } > - } > - > - if (devinfo->gen == 10) > - gen10_add_rcpfe_workaround_bits(&flags); > - > - BEGIN_BATCH(6); > - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); > - OUT_BATCH(flags); > - if (bo) { > - OUT_RELOC64(bo, RELOC_WRITE, offset); > - } else { > - OUT_BATCH(0); > - OUT_BATCH(0); > - } > - OUT_BATCH(imm); > - OUT_BATCH(imm >> 32); > - ADVANCE_BATCH(); > - } else if (devinfo->gen >= 6) { > - if (devinfo->gen == 6 && > - (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) { > - /* Hardware workaround: SNB B-Spec says: > - * > - * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush > - * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is > - * required. > - */ > - brw_emit_post_sync_nonzero_flush(brw); > - } > - > - flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags); > - > - /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24 > - * on later platforms. We always use PPGTT on Gen7+. > - */ > - unsigned gen6_gtt = devinfo->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE > : 0; > - > - BEGIN_BATCH(5); > - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); > - OUT_BATCH(flags); > - if (bo) { > - OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, gen6_gtt | offset); > - } else { > - OUT_BATCH(0); > - } > - OUT_BATCH(imm); > - OUT_BATCH(imm >> 32); > - ADVANCE_BATCH(); > - } else { > - BEGIN_BATCH(4); > - OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2)); > - if (bo) { > - OUT_RELOC(bo, RELOC_WRITE, PIPE_CONTROL_GLOBAL_GTT_WRITE | offset); > - } else { > - OUT_BATCH(0); > - } > - OUT_BATCH(imm); > - OUT_BATCH(imm >> 32); > - ADVANCE_BATCH(); > - } > -} > - > /** > * Emit a PIPE_CONTROL with various flushing flags. > * > @@ -246,7 +56,7 @@ brw_emit_pipe_control_flush(struct brw_context *brw, > uint32_t flags) > flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL); > } > > - brw_emit_pipe_control(brw, flags, NULL, 0, 0); > + brw->vtbl.emit_raw_pipe_control(brw, flags, NULL, 0, 0); > } > > /** > @@ -262,7 +72,7 @@ brw_emit_pipe_control_write(struct brw_context *brw, > uint32_t flags, > struct brw_bo *bo, uint32_t offset, > uint64_t imm) > { > - brw_emit_pipe_control(brw, flags, bo, offset, imm); > + brw->vtbl.emit_raw_pipe_control(brw, flags, bo, offset, imm); > } > > /** > @@ -357,14 +167,14 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw) > void > gen10_emit_isp_disable(struct brw_context *brw) > { > - brw_emit_pipe_control(brw, > - PIPE_CONTROL_STALL_AT_SCOREBOARD | > - PIPE_CONTROL_CS_STALL, > - NULL, 0, 0); > - brw_emit_pipe_control(brw, > - PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE | > - PIPE_CONTROL_CS_STALL, > - NULL, 0, 0); > + brw->vtbl.emit_raw_pipe_control(brw, > + PIPE_CONTROL_STALL_AT_SCOREBOARD | > + PIPE_CONTROL_CS_STALL, > + NULL, 0, 0); > + brw->vtbl.emit_raw_pipe_control(brw, > + > PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE | > + PIPE_CONTROL_CS_STALL, > + NULL, 0, 0); > > brw->vs.base.push_constants_dirty = true; > brw->tcs.base.push_constants_dirty = true; > @@ -561,6 +371,34 @@ int > brw_init_pipe_control(struct brw_context *brw, > const struct gen_device_info *devinfo) > { > + switch (devinfo->gen) { > + case 10: > + brw->vtbl.emit_raw_pipe_control = gen10_emit_raw_pipe_control; > + break; > + case 9: > + brw->vtbl.emit_raw_pipe_control = gen9_emit_raw_pipe_control; > + break; > + case 8: > + brw->vtbl.emit_raw_pipe_control = gen8_emit_raw_pipe_control; > + break; > + case 7: > + brw->vtbl.emit_raw_pipe_control = > + devinfo->is_haswell ? gen75_emit_raw_pipe_control > + : gen7_emit_raw_pipe_control; > + break; > + case 6: > + brw->vtbl.emit_raw_pipe_control = gen6_emit_raw_pipe_control; > + break; > + case 5: > + brw->vtbl.emit_raw_pipe_control = gen5_emit_raw_pipe_control; > + break; > + case 4: > + brw->vtbl.emit_raw_pipe_control = > + devinfo->is_g4x ? gen45_emit_raw_pipe_control > + : gen4_emit_raw_pipe_control; > + break; > + } > + > if (devinfo->gen < 6) > return 0; > > diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.h > b/src/mesa/drivers/dri/i965/brw_pipe_control.h > index 69b1c7c31e6..e213f43a4f7 100644 > --- a/src/mesa/drivers/dri/i965/brw_pipe_control.h > +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.h > @@ -32,34 +32,38 @@ struct brw_bo; > * > * PIPE_CONTROL operation, a combination MI_FLUSH and register write with > * additional flushing control. > + * > + * The bits here are not the actual hardware values. The actual values > + * shift around a bit per-generation, so we just have flags for each > + * potential operation, and use genxml to encode the actual packet. > */ > -#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24)) > -#define PIPE_CONTROL_LRI_WRITE_IMMEDIATE (1 << 23) /* Gen7+ */ > -#define PIPE_CONTROL_CS_STALL (1 << 20) > -#define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET (1 << 19) > -#define PIPE_CONTROL_TLB_INVALIDATE (1 << 18) > -#define PIPE_CONTROL_SYNC_GFDT (1 << 17) > -#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1 << 16) > -#define PIPE_CONTROL_NO_WRITE (0 << 14) > -#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14) > -#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14) > -#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14) > -#define PIPE_CONTROL_DEPTH_STALL (1 << 13) > -#define PIPE_CONTROL_RENDER_TARGET_FLUSH (1 << 12) > -#define PIPE_CONTROL_INSTRUCTION_INVALIDATE (1 << 11) > -#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1 << 10) /* GM45+ only > */ > -#define PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE (1 << 9) > -#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8) > -#define PIPE_CONTROL_FLUSH_ENABLE (1 << 7) /* Gen7+ only */ > -/* GT */ > -#define PIPE_CONTROL_DATA_CACHE_FLUSH (1 << 5) > -#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4) > -#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3) > -#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2) > -#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) > -#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) > -#define PIPE_CONTROL_PPGTT_WRITE (0 << 2) > -#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) > +enum pipe_control_flags > +{ > + PIPE_CONTROL_FLUSH_LLC = (1 << 1), > + PIPE_CONTROL_LRI_POST_SYNC_OP = (1 << 2), > + PIPE_CONTROL_STORE_DATA_INDEX = (1 << 3), > + PIPE_CONTROL_CS_STALL = (1 << 4), > + PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET = (1 << 5), > + PIPE_CONTROL_SYNC_GFDT = (1 << 6), > + PIPE_CONTROL_TLB_INVALIDATE = (1 << 7), > + PIPE_CONTROL_MEDIA_STATE_CLEAR = (1 << 8), > + PIPE_CONTROL_WRITE_IMMEDIATE = (1 << 9), > + PIPE_CONTROL_WRITE_DEPTH_COUNT = (1 << 10), > + PIPE_CONTROL_WRITE_TIMESTAMP = (1 << 11), > + PIPE_CONTROL_DEPTH_STALL = (1 << 12), > + PIPE_CONTROL_RENDER_TARGET_FLUSH = (1 << 13), > + PIPE_CONTROL_INSTRUCTION_INVALIDATE = (1 << 14), > + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE = (1 << 15), > + PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16), > + PIPE_CONTROL_NOTIFY_ENABLE = (1 << 17), > + PIPE_CONTROL_FLUSH_ENABLE = (1 << 18), > + PIPE_CONTROL_DATA_CACHE_FLUSH = (1 << 19), > + PIPE_CONTROL_VF_CACHE_INVALIDATE = (1 << 20), > + PIPE_CONTROL_CONST_CACHE_INVALIDATE = (1 << 21), > + PIPE_CONTROL_STATE_CACHE_INVALIDATE = (1 << 22), > + PIPE_CONTROL_STALL_AT_SCOREBOARD = (1 << 23), > + PIPE_CONTROL_DEPTH_CACHE_FLUSH = (1 << 24), > +}; > > #define PIPE_CONTROL_CACHE_FLUSH_BITS \ > (PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \ > diff --git a/src/mesa/drivers/dri/i965/brw_state.h > b/src/mesa/drivers/dri/i965/brw_state.h > index f6acf81b899..b62890729fa 100644 > --- a/src/mesa/drivers/dri/i965/brw_state.h > +++ b/src/mesa/drivers/dri/i965/brw_state.h > @@ -95,6 +95,37 @@ extern const struct brw_tracked_state gen7_urb; > extern const struct brw_tracked_state gen8_pma_fix; > extern const struct brw_tracked_state brw_cs_work_groups_surface; > > +void gen4_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, > + struct brw_bo *bo, uint32_t offset, > + uint64_t imm); > +void gen45_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, > + struct brw_bo *bo, uint32_t offset, > + uint64_t imm); > +void gen5_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, > + struct brw_bo *bo, uint32_t offset, > + uint64_t imm); > +void gen6_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, > + struct brw_bo *bo, uint32_t offset, > + uint64_t imm); > +void gen7_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, > + struct brw_bo *bo, uint32_t offset, > + uint64_t imm); > +void gen75_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, > + struct brw_bo *bo, uint32_t offset, > + uint64_t imm); > +void gen8_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, > + struct brw_bo *bo, uint32_t offset, > + uint64_t imm); > +void gen9_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, > + struct brw_bo *bo, uint32_t offset, > + uint64_t imm); > +void gen10_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, > + struct brw_bo *bo, uint32_t offset, > + uint64_t imm); > +void gen11_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags, > + struct brw_bo *bo, uint32_t offset, > + uint64_t imm); > + > static inline bool > brw_state_dirty(const struct brw_context *brw, > GLuint mesa_flags, uint64_t brw_flags) > diff --git a/src/mesa/drivers/dri/i965/genX_pipe_control.c > b/src/mesa/drivers/dri/i965/genX_pipe_control.c > new file mode 100644 > index 00000000000..8eb37444253 > --- /dev/null > +++ b/src/mesa/drivers/dri/i965/genX_pipe_control.c > @@ -0,0 +1,243 @@ > +/* > + * Copyright © 2017 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include "genX_boilerplate.h" > +#include "brw_defines.h" > +#include "brw_state.h" > + > +/** > + * According to the latest documentation, any PIPE_CONTROL with the > + * "Command Streamer Stall" bit set must also have another bit set, > + * with five different options: > + * > + * - Render Target Cache Flush > + * - Depth Cache Flush > + * - Stall at Pixel Scoreboard > + * - Post-Sync Operation > + * - Depth Stall > + * - DC Flush Enable > + * > + * I chose "Stall at Pixel Scoreboard" since we've used it effectively > + * in the past, but the choice is fairly arbitrary. > + */ > +static void > +gen8_add_cs_stall_workaround_bits(uint32_t *flags) > +{ > + uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH | > + PIPE_CONTROL_DEPTH_CACHE_FLUSH | > + PIPE_CONTROL_WRITE_IMMEDIATE | > + PIPE_CONTROL_WRITE_DEPTH_COUNT | > + PIPE_CONTROL_WRITE_TIMESTAMP | > + PIPE_CONTROL_STALL_AT_SCOREBOARD | > + PIPE_CONTROL_DEPTH_STALL | > + PIPE_CONTROL_DATA_CACHE_FLUSH; > + > + /* If we're doing a CS stall, and don't already have one of the > + * workaround bits set, add "Stall at Pixel Scoreboard." > + */ > + if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0) > + *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; > +} > + > +/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT: > + * > + * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with > + * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set." > + * > + * Note that the kernel does CS stalls between batches, so we only need > + * to count them within a batch. > + */ > +static uint32_t > +gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t > flags) > +{ > + if (GEN_GEN == 7 && !GEN_IS_HASWELL) { > + if (flags & PIPE_CONTROL_CS_STALL) { > + /* If we're doing a CS stall, reset the counter and carry on. */ > + brw->pipe_controls_since_last_cs_stall = 0; > + return 0; > + } > + > + /* If this is the fourth pipe control without a CS stall, do one now. > */ > + if (++brw->pipe_controls_since_last_cs_stall == 4) { > + brw->pipe_controls_since_last_cs_stall = 0; > + return PIPE_CONTROL_CS_STALL; > + } > + } > + return 0; > +} > + > +/* #1130 from gen10 workarounds page in h/w specs: > + * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is > + * not enabled in same PIPE CONTROL and Enable Pixel score board stall if > + * Render target cache flush is enabled." > + * > + * Applicable to CNL B0 and C0 steppings only. > + */ > +static void > +gen10_add_rcpfe_workaround_bits(uint32_t *flags) > +{ > + if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) { > + *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD; > + } else if (*flags & > + (PIPE_CONTROL_WRITE_IMMEDIATE | > + PIPE_CONTROL_WRITE_DEPTH_COUNT | > + PIPE_CONTROL_WRITE_TIMESTAMP)) { > + *flags = *flags | PIPE_CONTROL_DEPTH_STALL; > + } > +} > + > +static unsigned > +flags_to_post_sync_op(uint32_t flags) > +{ > + flags &= PIPE_CONTROL_WRITE_IMMEDIATE | > + PIPE_CONTROL_WRITE_DEPTH_COUNT | > + PIPE_CONTROL_WRITE_TIMESTAMP; > + > + assert(util_bitcount(flags) <= 1); > + > + if (flags & PIPE_CONTROL_WRITE_IMMEDIATE) > + return WriteImmediateData; > + > + if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) > + return WritePSDepthCount; > + > + if (flags & PIPE_CONTROL_WRITE_TIMESTAMP) > + return WriteTimestamp; > + > + return 0; > +} > + > +void > +genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags, > + struct brw_bo *bo, uint32_t offset, uint64_t imm) > +{ > + if (GEN_GEN >= 8) { > + if (GEN_GEN == 8) > + gen8_add_cs_stall_workaround_bits(&flags); > + > + if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) { > + if (GEN_GEN == 9) { > + /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit > description > + * lists several workarounds: > + * > + * "Project: SKL, KBL, BXT > + * > + * If the VF Cache Invalidation Enable is set to a 1 in a > + * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields > + * sets to 0, with the VF Cache Invalidation Enable set to 0 > + * needs to be sent prior to the PIPE_CONTROL with VF Cache > + * Invalidation Enable set to a 1." > + */ > + brw_emit_pipe_control_flush(brw, 0); > + } > + > + if (GEN_GEN >= 9) { > + /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue: > + * > + * "Project: BDW+ > + * > + * When VF Cache Invalidate is set “Post Sync Operation” must > + * be enabled to “Write Immediate Data” or “Write PS Depth > + * Count” or “Write Timestamp”." > + * > + * If there's a BO, we're already doing some kind of write. > + * If not, add a write to the workaround BO. > + * > + * XXX: This causes GPU hangs on Broadwell, so restrict it to > + * Gen9+ for now...see this bug for more information: > + * https://bugs.freedesktop.org/show_bug.cgi?id=103787 > + */ > + if (!bo) { > + flags |= PIPE_CONTROL_WRITE_IMMEDIATE; > + bo = brw->workaround_bo; > + } > + } > + } > + > + if (GEN_GEN == 10) > + gen10_add_rcpfe_workaround_bits(&flags); > + } else if (GEN_GEN >= 6) { > + if (GEN_GEN == 6 && > + (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) { > + /* Hardware workaround: SNB B-Spec says: > + * > + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush > + * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is > + * required. > + */ > + brw_emit_post_sync_nonzero_flush(brw); > + } > + > + flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags); > + } > + > + brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) { > + #if GEN_GEN >= 9 > + pc.FlushLLC = 0; > + #endif > + #if GEN_GEN >= 7 > + pc.LRIPostSyncOperation = NoLRIOperation; > + pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE; > + pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH; > + #endif > + #if GEN_GEN >= 6 > + pc.StoreDataIndex = 0; > + pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL; > + pc.GlobalSnapshotCountReset = > + flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET; > + pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE; While comparing this against src/intel/genxml/gen*.xml I noticed that gen6 also has: <field name="Synchronize GFDT Surface" start="49" end="49" type="bool"/> Should we add it for consistency? > + pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR; > + pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD; gen5 also has: <field name="Stall At Pixel Scoreboard" start="33" end="33" type="bool"/> We didn't have this before and therefore this patch is fine. I just thought I write it down since I came across that. > + pc.RenderTargetCacheFlushEnable = > + flags & PIPE_CONTROL_RENDER_TARGET_FLUSH; > + pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH; > + pc.StateCacheInvalidationEnable = > + flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE; > + pc.VFCacheInvalidationEnable = flags & > PIPE_CONTROL_VF_CACHE_INVALIDATE; > + pc.ConstantCacheInvalidationEnable = > + flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE; > + #else > + pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH; > + #endif > + pc.PostSyncOperation = flags_to_post_sync_op(flags); > + pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL; > + pc.InstructionCacheInvalidateEnable = > + flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE; > + pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE; > + #if GEN_GEN >= 5 || GEN_IS_G4X > + pc.IndirectStatePointersDisable = > + flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE; > + #endif > + #if GEN_GEN >= 6 > + pc.TextureCacheInvalidationEnable = > + flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; > + #elif GEN_GEN == 5 || GEN_IS_G4X > + pc.TextureCacheFlushEnable = > + flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; gen5 also has: <field name="Depth Cache Flush Inhibit" start="32" end="32" type="uint"> > + #endif > + pc.Address = ggtt_bo(bo, offset); > + if (GEN_GEN < 7 && bo) > + pc.DestinationAddressType = DAT_GGTT; > + pc.ImmediateData = imm; > + } > +} > diff --git a/src/mesa/drivers/dri/i965/meson.build > b/src/mesa/drivers/dri/i965/meson.build > index 02f5f3073f7..e7c890785e2 100644 > --- a/src/mesa/drivers/dri/i965/meson.build > +++ b/src/mesa/drivers/dri/i965/meson.build > @@ -147,8 +147,8 @@ i965_gen_libs = [] > foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '100', '110'] > i965_gen_libs += static_library( > 'i965_gen@0@'.format(v), > - ['genX_blorp_exec.c', 'genX_boilerplate.h', 'genX_state_upload.c', > - gen_xml_pack], > + ['genX_blorp_exec.c', 'genX_boilerplate.h', 'genX_pipe_control.c', > + 'genX_state_upload.c', gen_xml_pack], > include_directories : [inc_common, inc_intel, inc_dri_common], > c_args : [ > c_vis_args, no_override_init_args, c_sse2_args, > -- > 2.19.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev