On Thu, Nov 01, 2018 at 08:04:21PM -0700, Kenneth Graunke wrote: > This implements virtually all documented PIPE_CONTROL restrictions > in a centralized helper. You now simply ask for the operations you > want, and the pipe control "brain" will figure out exactly what pipe > controls to emit to make that happen without tanking your system. > > The hope is that this will fix some intermittent flushing issues as > well as GPU hangs. However, it also has a high risk of causing GPU > hangs and other regressions, as this is a particularly sensitive > area and poking the bear isn't always advisable.
First I checked I could find all the things in bspec. There was one that I couldn't, noted further down. Second I checked that all the rules earlier were implemented. Found one exception, noted further down as well. I didn't check if the rules still miss something in bspec. That would be another exercise... > --- > src/mesa/drivers/dri/i965/genX_pipe_control.c | 563 +++++++++++++----- > 1 file changed, 428 insertions(+), 135 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/genX_pipe_control.c > b/src/mesa/drivers/dri/i965/genX_pipe_control.c > index 8eb37444253..503e674661b 100644 > --- a/src/mesa/drivers/dri/i965/genX_pipe_control.c > +++ b/src/mesa/drivers/dri/i965/genX_pipe_control.c > @@ -25,172 +25,465 @@ > #include "brw_defines.h" > #include "brw_state.h" > > +static unsigned > +flags_to_post_sync_op(uint32_t flags) > +{ > + if (flags & PIPE_CONTROL_WRITE_IMMEDIATE) > + return WriteImmediateData; > + > + if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) > + return WritePSDepthCount; > + > + if (flags & PIPE_CONTROL_WRITE_TIMESTAMP) > + return WriteTimestamp; > + > + return 0; > +} > + > /** > - * According to the latest documentation, any PIPE_CONTROL with the > - * "Command Streamer Stall" bit set must also have another bit set, > - * with five different options: > - * > - * - Render Target Cache Flush > - * - Depth Cache Flush > - * - Stall at Pixel Scoreboard > - * - Post-Sync Operation > - * - Depth Stall > - * - DC Flush Enable > - * > - * I chose "Stall at Pixel Scoreboard" since we've used it effectively > - * in the past, but the choice is fairly arbitrary. > + * Do the given flags have a Post Sync or LRI Post Sync operation? > */ > -static void > -gen8_add_cs_stall_workaround_bits(uint32_t *flags) > +static enum pipe_control_flags > +get_post_sync_flags(enum pipe_control_flags flags) > { > - uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH | > - PIPE_CONTROL_DEPTH_CACHE_FLUSH | > - PIPE_CONTROL_WRITE_IMMEDIATE | > - PIPE_CONTROL_WRITE_DEPTH_COUNT | > - PIPE_CONTROL_WRITE_TIMESTAMP | > - PIPE_CONTROL_STALL_AT_SCOREBOARD | > - PIPE_CONTROL_DEPTH_STALL | > - PIPE_CONTROL_DATA_CACHE_FLUSH; > - > - /* If we're doing a CS stall, and don't already have one of the > - * workaround bits set, add "Stall at Pixel Scoreboard." > + flags &= PIPE_CONTROL_WRITE_IMMEDIATE | > + PIPE_CONTROL_WRITE_DEPTH_COUNT | > + PIPE_CONTROL_WRITE_TIMESTAMP | > + PIPE_CONTROL_LRI_POST_SYNC_OP; > + > + /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with > + * "LRI Post Sync Operation". So more than one bit set would be illegal. > */ > - if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0) > - *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; > + assert(util_bitcount(flags) <= 1); > + > + return flags; > } > > -/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT: > +#define IS_COMPUTE_PIPELINE(brw) \ > + (GEN_GEN >= 7 && brw->last_pipeline == BRW_COMPUTE_PIPELINE) > + > +/* Closed interval - GEN_GEN \in [x, y] */ > +#define IS_GEN_BETWEEN(x, y) (GEN_GEN >= x && GEN_GEN <= y) > +#define IS_GENx10_BETWEEN(x, y) \ > + (GEN_VERSIONx10 >= x && GEN_VERSIONx10 <= y) > + > +/** > + * Emit a series of PIPE_CONTROL commands, taking into account any > + * workarounds necessary to actually accomplish the caller's request. > + * > + * Unless otherwise noted, spec quotations in this function come from: > * > - * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with > - * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set." > + * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming > + * Restrictions for PIPE_CONTROL. > * > - * Note that the kernel does CS stalls between batches, so we only need > - * to count them within a batch. > + * You should not use this function directly. Use the helpers in > + * brw_pipe_control.c instead, which may split the pipe control further. > */ > -static uint32_t > -gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t > flags) > +void > +genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags, > + struct brw_bo *bo, uint32_t offset, uint64_t imm) > { > - if (GEN_GEN == 7 && !GEN_IS_HASWELL) { > - if (flags & PIPE_CONTROL_CS_STALL) { > - /* If we're doing a CS stall, reset the counter and carry on. */ > - brw->pipe_controls_since_last_cs_stall = 0; > - return 0; > + UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo; > + enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags); > + enum pipe_control_flags non_lri_post_sync_flags = > + post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP; > + > + /* Recursive PIPE_CONTROL workarounds -------------------------------- > + * (http://knowyourmeme.com/memes/xzibit-yo-dawg) > + * > + * We do these first because we want to look at the original operation, > + * rather than any workarounds we set. > + */ > + if (GEN_GEN == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) { > + /* Hardware workaround: SNB B-Spec says: > + * > + * "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush > + * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is > + * required." > + */ > + brw_emit_post_sync_nonzero_flush(brw); > + } > + > + if (GEN_GEN == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) { > + /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description > + * lists several workarounds: > + * > + * "Project: SKL, KBL, BXT > + * > + * If the VF Cache Invalidation Enable is set to a 1 in a > + * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields > + * sets to 0, with the VF Cache Invalidation Enable set to 0 > + * needs to be sent prior to the PIPE_CONTROL with VF Cache > + * Invalidation Enable set to a 1." > + */ > + genX(emit_raw_pipe_control)(brw, 0, NULL, 0, 0); > + } > + > + if (GEN_GEN == 9 && IS_COMPUTE_PIPELINE(brw) && post_sync_flags) { > + /* Project: SKL / Argument: LRI Post Sync Operation [23] > + * > + * "PIPECONTROL command with “Command Streamer Stall Enable” must be > + * programmed prior to programming a PIPECONTROL command with "LRI > + * Post Sync Operation" in GPGPU mode of operation (i.e when > + * PIPELINE_SELECT command is set to GPGPU mode of operation)." > + * > + * The same text exists a few rows below for Post Sync Op. > + */ > + genX(emit_raw_pipe_control)(brw, PIPE_CONTROL_CS_STALL, bo, offset, > imm); Are bo, offset, imm needed here as well? > + } > + > + if (GEN_GEN == 10 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) { > + /* Cannonlake: > + * "Before sending a PIPE_CONTROL command with bit 12 set, SW must > issue > + * another PIPE_CONTROL with Render Target Cache Flush Enable (bit 12) > + * = 0 and Pipe Control Flush Enable (bit 7) = 1" > + */ > + genX(emit_raw_pipe_control)(brw, PIPE_CONTROL_FLUSH_ENABLE, > + bo, offset, imm); > + } > + > + /* "Flush Types" workarounds --------------------------------------------- > + * We do these now because they may add post-sync operations or CS stalls. > + */ > + > + if (IS_GEN_BETWEEN(8, 10) && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) { > + /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate > + * > + * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or > + * 'Write PS Depth Count' or 'Write Timestamp'." > + */ > + if (!bo) { > + flags |= PIPE_CONTROL_WRITE_IMMEDIATE; > + post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; > + non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; > + bo = brw->workaround_bo; > } > + } > > - /* If this is the fourth pipe control without a CS stall, do one now. > */ > - if (++brw->pipe_controls_since_last_cs_stall == 4) { > - brw->pipe_controls_since_last_cs_stall = 0; > - return PIPE_CONTROL_CS_STALL; > + /* #1130 from Gen10 workarounds page: > + * > + * "Enable Depth Stall on every Post Sync Op if Render target Cache > + * Flush is not enabled in same PIPE CONTROL and Enable Pixel score > + * board stall if Render target cache flush is enabled." > + * > + * Applicable to CNL B0 and C0 steppings only. > + * > + * The wording here is unclear, and this workaround doesn't look anything > + * like the internal bug report recommendations, but leave it be for > now... > + */ > + if (GEN_GEN == 10) { > + if (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) { > + flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; > + } else if (flags & non_lri_post_sync_flags) { > + flags |= PIPE_CONTROL_DEPTH_STALL; > } > } > - return 0; > -} > > -/* #1130 from gen10 workarounds page in h/w specs: > - * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is > - * not enabled in same PIPE CONTROL and Enable Pixel score board stall if > - * Render target cache flush is enabled." > - * > - * Applicable to CNL B0 and C0 steppings only. > - */ > -static void > -gen10_add_rcpfe_workaround_bits(uint32_t *flags) > -{ > - if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) { > - *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD; > - } else if (*flags & > - (PIPE_CONTROL_WRITE_IMMEDIATE | > - PIPE_CONTROL_WRITE_DEPTH_COUNT | > - PIPE_CONTROL_WRITE_TIMESTAMP)) { > - *flags = *flags | PIPE_CONTROL_DEPTH_STALL; > + if (GEN_VERSIONx10 < 75 && (flags & PIPE_CONTROL_DEPTH_STALL)) { > + /* Project: PRE-HSW / Argument: Depth Stall > + * > + * "The following bits must be clear: > + * - Render Target Cache Flush Enable ([12] of DW1) > + * - Depth Cache Flush Enable ([0] of DW1)" > + */ > + assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH | > + PIPE_CONTROL_DEPTH_CACHE_FLUSH))); > } > -} > > -static unsigned > -flags_to_post_sync_op(uint32_t flags) > -{ > - flags &= PIPE_CONTROL_WRITE_IMMEDIATE | > - PIPE_CONTROL_WRITE_DEPTH_COUNT | > - PIPE_CONTROL_WRITE_TIMESTAMP; > + if (GEN_GEN >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) { > + /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall > Enable): > + * > + * "This bit must be DISABLED for operations other than writing > + * PS_DEPTH_COUNT." > + * > + * This seems like nonsense. An Ivybridge workaround requires us to > + * emit a PIPE_CONTROL with a depth stall and write immediate post-sync > + * operation. Gen8+ requires us to emit depth stalls and depth cache > + * flushes together. So, it's hard to imagine this means anything > other > + * than "we originally intended this to be used for PS_DEPTH_COUNT". > + * > + * We ignore the supposed restriction and do nothing. > + */ > + } > > - assert(util_bitcount(flags) <= 1); > + if (GEN_VERSIONx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) { > + /* Project: PRE-HSW / Argument: Depth Cache Flush > + * > + * "Depth Stall must be clear ([13] of DW1)." > + */ > + assert(!(flags & PIPE_CONTROL_DEPTH_STALL)); > + } > > - if (flags & PIPE_CONTROL_WRITE_IMMEDIATE) > - return WriteImmediateData; > + if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH | > + PIPE_CONTROL_STALL_AT_SCOREBOARD)) { > + /* From the PIPE_CONTROL instruction table, bit 12 and bit 1: > + * > + * "This bit must be DISABLED for End-of-pipe (Read) fences, > + * PS_DEPTH_COUNT or TIMESTAMP queries." > + * > + * TODO: Implement end-of-pipe checking. > + */ > + assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT | > + PIPE_CONTROL_WRITE_TIMESTAMP))); > + } > > - if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) > - return WritePSDepthCount; > + if (GEN_GEN < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) { > + /* From the PIPE_CONTROL instruction table, bit 1: > + * > + * "This bit is ignored if Depth Stall Enable is set. > + * Further, the render cache is not flushed even if Write Cache > + * Flush Enable bit is set." > + * > + * We assert that the caller doesn't do this combination, to try and > + * prevent mistakes. It shouldn't hurt the GPU, though. > + * > + * We skip this check on Gen11+ as the "Stall and Pixel Scoreboard" > + * and "Render Target Flush" combo is explicitly required for BTI > + * update workarounds. > + */ > + assert(!(flags & (PIPE_CONTROL_DEPTH_STALL | > + PIPE_CONTROL_RENDER_TARGET_FLUSH))); > + } > > - if (flags & PIPE_CONTROL_WRITE_TIMESTAMP) > - return WriteTimestamp; > + /* PIPE_CONTROL page workarounds ------------------------------------- */ > > - return 0; > -} > + if (IS_GEN_BETWEEN(7, 8) && (flags & > PIPE_CONTROL_STATE_CACHE_INVALIDATE)) { > + /* From the PIPE_CONTROL page itself: > + * > + * "IVB, HSW, BDW > + * Restriction: Pipe_control with CS-stall bit set must be issued > + * before a pipe-control command that has the State Cache > + * Invalidate bit set." > + */ > + flags |= PIPE_CONTROL_CS_STALL; > + } > > -void > -genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags, > - struct brw_bo *bo, uint32_t offset, uint64_t imm) > -{ > - if (GEN_GEN >= 8) { > - if (GEN_GEN == 8) > - gen8_add_cs_stall_workaround_bits(&flags); > - > - if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) { > - if (GEN_GEN == 9) { > - /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit > description > - * lists several workarounds: > - * > - * "Project: SKL, KBL, BXT > - * > - * If the VF Cache Invalidation Enable is set to a 1 in a > - * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields > - * sets to 0, with the VF Cache Invalidation Enable set to 0 > - * needs to be sent prior to the PIPE_CONTROL with VF Cache > - * Invalidation Enable set to a 1." > - */ > - brw_emit_pipe_control_flush(brw, 0); > - } > - > - if (GEN_GEN >= 9) { > - /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue: > - * > - * "Project: BDW+ > - * > - * When VF Cache Invalidate is set “Post Sync Operation” must > - * be enabled to “Write Immediate Data” or “Write PS Depth > - * Count” or “Write Timestamp”." > - * > - * If there's a BO, we're already doing some kind of write. > - * If not, add a write to the workaround BO. > - * > - * XXX: This causes GPU hangs on Broadwell, so restrict it to > - * Gen9+ for now...see this bug for more information: > - * https://bugs.freedesktop.org/show_bug.cgi?id=103787 In "Flush Types" workarounds later on you apply this for gen8 as well. > - */ > - if (!bo) { > - flags |= PIPE_CONTROL_WRITE_IMMEDIATE; > - bo = brw->workaround_bo; > - } > - } > + if (GEN_IS_HASWELL) { > + /* From the PIPE_CONTROL page itself: > + * > + * "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation: > + * Prior to programming a PIPECONTROL command with any of the RO > + * cache invalidation bit set, program a PIPECONTROL flush command > + * with “CS stall” bit and “HDC Flush” bit set." > + * > + * TODO: Actually implement this. What's an HDC Flush? There is bit 9 HDC Flush but that is defined for ICL, for HSW I couldn't find anything either. > + */ > + } > + > + if (flags & PIPE_CONTROL_FLUSH_LLC) { > + /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC): > + * > + * "Project: ALL > + * SW must always program Post-Sync Operation to "Write Immediate > + * Data" when Flush LLC is set." > + * > + * For now, we just require the caller to do it. > + */ > + assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE); > + } > + > + /* "Post-Sync Operation" workarounds -------------------------------- */ > + > + /* Project: All / Argument: Global Snapshot Count Reset [19] > + * > + * "This bit must not be exercised on any product. > + * Requires stall bit ([20] of DW1) set." > + * > + * We don't use this, so we just assert that it isn't used. The > + * PIPE_CONTROL instruction page indicates that they intended this > + * as a debug feature and don't think it is useful in production, > + * but it may actually be usable, should we ever want to. > + */ > + assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0); > + > + if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR | > + PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) { > + /* Project: All / Arguments: > + * > + * - Generic Media State Clear [16] > + * - Indirect State Pointers Disable [16] > + * > + * "Requires stall bit ([20] of DW1) set." > + * > + * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media > + * State Clear) says: > + * > + * "PIPECONTROL command with “Command Streamer Stall Enable” must be > + * programmed prior to programming a PIPECONTROL command with > "Media > + * State Clear" set in GPGPU mode of operation" > + * > + * This is a subset of the earlier rule, so there's nothing to do. > + */ > + flags |= PIPE_CONTROL_CS_STALL; > + } > + > + if (flags & PIPE_CONTROL_STORE_DATA_INDEX) { > + /* Project: All / Argument: Store Data Index > + * > + * "Post-Sync Operation ([15:14] of DW1) must be set to something other > + * than '0'." > + * > + * For now, we just assert that the caller does this. We might want to > + * automatically add a write to the workaround BO... > + */ > + assert(non_lri_post_sync_flags != 0); > + } > + > + if (flags & PIPE_CONTROL_SYNC_GFDT) { > + /* Project: All / Argument: Sync GFDT > + * > + * "Post-Sync Operation ([15:14] of DW1) must be set to something other > + * than '0' or 0x2520[13] must be set." > + * > + * For now, we just assert that the caller does this. > + */ > + assert(non_lri_post_sync_flags != 0); > + } > + > + if (IS_GENx10_BETWEEN(60, 75) && (flags & PIPE_CONTROL_TLB_INVALIDATE)) { > + /* Project: SNB, IVB, HSW / Argument: TLB inv > + * > + * "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1) > + * must be set to something other than '0'." > + * > + * For now, we just assert that the caller does this. > + */ > + assert(non_lri_post_sync_flags != 0); > + } > + > + if (GEN_GEN >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) { > + /* Project: IVB+ / Argument: TLB inv > + * > + * "Requires stall bit ([20] of DW1) set." > + * > + * Also, from the PIPE_CONTROL instruction table: > + * > + * "Project: SKL+ > + * Post Sync Operation or CS stall must be set to ensure a TLB > + * invalidation occurs. Otherwise no cycle will occur to the TLB > + * cache to invalidate." > + * > + * This is not a subset of the earlier rule, so there's nothing to do. > + */ > + flags |= PIPE_CONTROL_CS_STALL; > + } > + > + if (GEN_GEN == 9 && devinfo->gt == 4) { > + /* TODO: The big Skylake GT4 post sync op workaround */ > + } > + > + /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */ > + > + if (IS_COMPUTE_PIPELINE(brw)) { > + if (GEN_GEN >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) { > + /* Project: SKL+ / Argument: Tex Invalidate > + * "Requires stall bit ([20] of DW) set for all GPGPU Workloads." > + */ > + flags |= PIPE_CONTROL_CS_STALL; > } > > - if (GEN_GEN == 10) > - gen10_add_rcpfe_workaround_bits(&flags); > - } else if (GEN_GEN >= 6) { > - if (GEN_GEN == 6 && > - (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) { > - /* Hardware workaround: SNB B-Spec says: > + if (GEN_GEN == 8 && (post_sync_flags || > + (flags & (PIPE_CONTROL_NOTIFY_ENABLE | > + PIPE_CONTROL_DEPTH_STALL | > + PIPE_CONTROL_RENDER_TARGET_FLUSH | > + PIPE_CONTROL_DEPTH_CACHE_FLUSH | > + PIPE_CONTROL_DATA_CACHE_FLUSH)))) { > + /* Project: BDW / Arguments: > * > - * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush > - * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is > - * required. > + * - LRI Post Sync Operation [23] > + * - Post Sync Op [15:14] > + * - Notify En [8] > + * - Depth Stall [13] > + * - Render Target Cache Flush [12] > + * - Depth Cache Flush [0] > + * - DC Flush Enable [5] > + * > + * "Requires stall bit ([20] of DW) set for all GPGPU and Media > + * Workloads." This I couldn't find. > */ > - brw_emit_post_sync_nonzero_flush(brw); > + flags |= PIPE_CONTROL_CS_STALL; > + > + /* Also, from the PIPE_CONTROL instruction table, bit 20: > + * > + * "Project: BDW > + * This bit must be always set when PIPE_CONTROL command is > + * programmed by GPGPU and MEDIA workloads, except for the cases > + * when only Read Only Cache Invalidation bits are set (State > + * Cache Invalidation Enable, Instruction cache Invalidation > + * Enable, Texture Cache Invalidation Enable, Constant Cache > + * Invalidation Enable). This is to WA FFDOP CG issue, this WA > + * need not implemented when FF_DOP_CG is disable via "Fixed > + * Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL > register." > + * > + * It sounds like we could avoid CS stalls in some cases, but we > + * don't currently bother. This list isn't exactly the list above, > + * either... > + */ > + } > + } > + > + /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT: > + * > + * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with > + * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set." > + * > + * Note that the kernel does CS stalls between batches, so we only need > + * to count them within a batch. We currently naively count every 4, and > + * don't skip the ones with only read-cache-invalidate bits set. This > + * may or may not be a problem... > + */ > + if (GEN_GEN == 7 && !GEN_IS_HASWELL) { > + if (flags & PIPE_CONTROL_CS_STALL) { > + /* If we're doing a CS stall, reset the counter and carry on. */ > + brw->pipe_controls_since_last_cs_stall = 0; > } > > - flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags); > + /* If this is the fourth pipe control without a CS stall, do one now. > */ > + if (++brw->pipe_controls_since_last_cs_stall == 4) { > + brw->pipe_controls_since_last_cs_stall = 0; > + flags |= PIPE_CONTROL_CS_STALL; > + } > } > > + /* "Stall" workarounds ---------------------------------------------- > + * These have to come after the earlier ones because we may have added > + * some additional CS stalls above. > + */ > + > + if (GEN_GEN < 9 && (flags & PIPE_CONTROL_CS_STALL)) { > + /* Project: PRE-SKL, VLV, CHV > + * > + * "[All Stepping][All SKUs]: > + * > + * One of the following must also be set: > + * > + * - Render Target Cache Flush Enable ([12] of DW1) > + * - Depth Cache Flush Enable ([0] of DW1) > + * - Stall at Pixel Scoreboard ([1] of DW1) > + * - Depth Stall ([13] of DW1) > + * - Post-Sync Operation ([13] of DW1) > + * - DC Flush Enable ([5] of DW1)" > + * > + * If we don't already have one of those bits set, we choose to add > + * "Stall at Pixel Scoreboard". Some of the other bits require a > + * CS stall as a workaround (see above), which would send us into > + * an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard" > + * appears to be safe, so we choose that. > + */ > + const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH | > + PIPE_CONTROL_DEPTH_CACHE_FLUSH | > + PIPE_CONTROL_WRITE_IMMEDIATE | > + PIPE_CONTROL_WRITE_DEPTH_COUNT | > + PIPE_CONTROL_WRITE_TIMESTAMP | > + PIPE_CONTROL_STALL_AT_SCOREBOARD | > + PIPE_CONTROL_DEPTH_STALL | > + PIPE_CONTROL_DATA_CACHE_FLUSH; > + if (!(flags & wa_bits)) > + flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; > + } > + > + /* Emit --------------------------------------------------------------- */ > + > brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) { > #if GEN_GEN >= 9 > pc.FlushLLC = 0; > -- > 2.19.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev