Quoting Samuel Pitoiset (2018-07-13 03:30:01) > > > On 07/13/2018 12:05 PM, Samuel Pitoiset wrote: > > > > > > On 07/12/2018 09:43 PM, Dylan Baker wrote: > >> Quoting Samuel Pitoiset (2018-07-11 02:55:55) > >>> A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion > >>> counters) must immediately precede every timestamp event to > >>> prevent a GPU hang on GFX9. > >>> > >>> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > >>> Cc: 18.1 <mesa-sta...@lists.freedesktop.org> > >>> --- > >>> src/amd/vulkan/radv_cmd_buffer.c | 15 +++++++++++++-- > >>> src/amd/vulkan/radv_device.c | 4 ++-- > >>> src/amd/vulkan/radv_private.h | 7 +++++-- > >>> src/amd/vulkan/radv_query.c | 9 ++++++--- > >>> src/amd/vulkan/si_cmd_buffer.c | 26 +++++++++++++++++++++----- > >>> 5 files changed, 47 insertions(+), 14 deletions(-) > >>> > >>> diff --git a/src/amd/vulkan/radv_cmd_buffer.c > >>> b/src/amd/vulkan/radv_cmd_buffer.c > >>> index 9da42fe03e..325e1993f8 100644 > >>> --- a/src/amd/vulkan/radv_cmd_buffer.c > >>> +++ b/src/amd/vulkan/radv_cmd_buffer.c > >>> @@ -319,11 +319,21 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer > >>> *cmd_buffer) > >>> } > >>> if (cmd_buffer->device->physical_device->rad_info.chip_class > >>> >= GFX9) { > >>> + unsigned num_db = > >>> cmd_buffer->device->physical_device->rad_info.num_render_backends; > >>> + unsigned eop_bug_offset; > >>> void *fence_ptr; > >>> + > >>> radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0, > >>> > >>> &cmd_buffer->gfx9_fence_offset, > >>> &fence_ptr); > >>> cmd_buffer->gfx9_fence_bo = > >>> cmd_buffer->upload.upload_bo; > >>> + > >>> + /* Allocate a buffer for the EOP bug on GFX9. */ > >>> + radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0, > >>> + &eop_bug_offset, > >>> &fence_ptr); > >>> + cmd_buffer->gfx9_eop_bug_va = > >>> + > >>> radv_buffer_get_va(cmd_buffer->upload.upload_bo); > >>> + cmd_buffer->gfx9_eop_bug_va += eop_bug_offset; > >>> } > >>> cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL; > >>> @@ -473,7 +483,7 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer > >>> *cmd_buffer, > >>> > >>> cmd_buffer->device->physical_device->rad_info.chip_class, > >>> ptr, va, > >>> > >>> radv_cmd_buffer_uses_mec(cmd_buffer), > >>> - flags); > >>> + flags, > >>> cmd_buffer->gfx9_eop_bug_va); > >>> } > >>> if (unlikely(cmd_buffer->device->trace_bo)) > >>> @@ -4318,7 +4328,8 @@ static void write_event(struct radv_cmd_buffer > >>> *cmd_buffer, > >>> > >>> cmd_buffer->device->physical_device->rad_info.chip_class, > >>> > >>> radv_cmd_buffer_uses_mec(cmd_buffer), > >>> > >>> V_028A90_BOTTOM_OF_PIPE_TS, 0, > >>> - EOP_DATA_SEL_VALUE_32BIT, > >>> va, 2, value); > >>> + EOP_DATA_SEL_VALUE_32BIT, > >>> va, 2, value, > >>> + cmd_buffer->gfx9_eop_bug_va); > >>> } > >>> assert(cmd_buffer->cs->cdw <= cdw_max); > >>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c > >>> index 73c48cef1f..1c0a50c82f 100644 > >>> --- a/src/amd/vulkan/radv_device.c > >>> +++ b/src/amd/vulkan/radv_device.c > >>> @@ -2240,7 +2240,7 @@ radv_get_preamble_cs(struct radv_queue *queue, > >>> > >>> RADV_CMD_FLAG_INV_SMEM_L1 | > >>> > >>> RADV_CMD_FLAG_INV_VMEM_L1 | > >>> > >>> RADV_CMD_FLAG_INV_GLOBAL_L2 | > >>> - > >>> RADV_CMD_FLAG_START_PIPELINE_STATS); > >>> + > >>> RADV_CMD_FLAG_START_PIPELINE_STATS, 0); > >>> } else if (i == 1) { > >>> si_cs_emit_cache_flush(cs, > >>> > >>> queue->device->physical_device->rad_info.chip_class, > >>> @@ -2251,7 +2251,7 @@ radv_get_preamble_cs(struct radv_queue *queue, > >>> > >>> RADV_CMD_FLAG_INV_SMEM_L1 | > >>> > >>> RADV_CMD_FLAG_INV_VMEM_L1 | > >>> > >>> RADV_CMD_FLAG_INV_GLOBAL_L2 | > >>> - > >>> RADV_CMD_FLAG_START_PIPELINE_STATS); > >>> + > >>> RADV_CMD_FLAG_START_PIPELINE_STATS, 0); > >>> } > >>> if (!queue->device->ws->cs_finalize(cs)) > >>> diff --git a/src/amd/vulkan/radv_private.h > >>> b/src/amd/vulkan/radv_private.h > >>> index 4e4b3a6037..96218f4be2 100644 > >>> --- a/src/amd/vulkan/radv_private.h > >>> +++ b/src/amd/vulkan/radv_private.h > >>> @@ -1041,6 +1041,7 @@ struct radv_cmd_buffer { > >>> uint32_t gfx9_fence_offset; > >>> struct radeon_winsys_bo *gfx9_fence_bo; > >>> uint32_t gfx9_fence_idx; > >>> + uint64_t gfx9_eop_bug_va; > >>> /** > >>> * Whether a query pool has been resetted and we have to > >>> flush caches. > >>> @@ -1072,7 +1073,8 @@ void si_cs_emit_write_event_eop(struct > >>> radeon_cmdbuf *cs, > >>> unsigned data_sel, > >>> uint64_t va, > >>> uint32_t old_fence, > >>> - uint32_t new_fence); > >>> + uint32_t new_fence, > >>> + uint64_t gfx9_eop_bug_va); > >>> void si_emit_wait_fence(struct radeon_cmdbuf *cs, > >>> uint64_t va, uint32_t ref, > >>> @@ -1081,7 +1083,8 @@ void si_cs_emit_cache_flush(struct > >>> radeon_cmdbuf *cs, > >>> enum chip_class chip_class, > >>> uint32_t *fence_ptr, uint64_t va, > >>> bool is_mec, > >>> - enum radv_cmd_flush_bits flush_bits); > >>> + enum radv_cmd_flush_bits flush_bits, > >>> + uint64_t gfx9_eop_bug_va); > >>> void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer); > >>> void si_emit_set_predication_state(struct radv_cmd_buffer > >>> *cmd_buffer, uint64_t va); > >>> void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, > >>> diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c > >>> index 267d45890e..ba3783905f 100644 > >>> --- a/src/amd/vulkan/radv_query.c > >>> +++ b/src/amd/vulkan/radv_query.c > >>> @@ -1180,7 +1180,8 @@ static void emit_end_query(struct > >>> radv_cmd_buffer *cmd_buffer, > >>> > >>> radv_cmd_buffer_uses_mec(cmd_buffer), > >>> > >>> V_028A90_BOTTOM_OF_PIPE_TS, 0, > >>> EOP_DATA_SEL_VALUE_32BIT, > >>> - avail_va, 0, 1); > >>> + avail_va, 0, 1, > >>> + cmd_buffer->gfx9_eop_bug_va); > >>> break; > >>> default: > >>> unreachable("ending unhandled query type"); > >>> @@ -1303,13 +1304,15 @@ void radv_CmdWriteTimestamp( > >>> mec, > >>> > >>> V_028A90_BOTTOM_OF_PIPE_TS, 0, > >>> > >>> EOP_DATA_SEL_TIMESTAMP, > >>> - query_va, 0, 0); > >>> + query_va, 0, 0, > >>> + > >>> cmd_buffer->gfx9_eop_bug_va); > >>> si_cs_emit_write_event_eop(cs, > >>> > >>> cmd_buffer->device->physical_device->rad_info.chip_class, > >>> mec, > >>> > >>> V_028A90_BOTTOM_OF_PIPE_TS, 0, > >>> > >>> EOP_DATA_SEL_VALUE_32BIT, > >>> - avail_va, 0, 1); > >>> + avail_va, 0, 1, > >>> + > >>> cmd_buffer->gfx9_eop_bug_va); > >>> break; > >>> } > >>> query_va += pool->stride; > >>> diff --git a/src/amd/vulkan/si_cmd_buffer.c > >>> b/src/amd/vulkan/si_cmd_buffer.c > >>> index 454fd8c39c..49c5eb1372 100644 > >>> --- a/src/amd/vulkan/si_cmd_buffer.c > >>> +++ b/src/amd/vulkan/si_cmd_buffer.c > >>> @@ -679,7 +679,8 @@ void si_cs_emit_write_event_eop(struct > >>> radeon_cmdbuf *cs, > >>> unsigned data_sel, > >>> uint64_t va, > >>> uint32_t old_fence, > >>> - uint32_t new_fence) > >>> + uint32_t new_fence, > >>> + uint64_t gfx9_eop_bug_va) > >>> { > >>> unsigned op = EVENT_TYPE(event) | > >>> EVENT_INDEX(5) | > >>> @@ -693,6 +694,17 @@ void si_cs_emit_write_event_eop(struct > >>> radeon_cmdbuf *cs, > >>> sel |= > >>> EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM); > >>> if (chip_class >= GFX9 || is_gfx8_mec) { > >>> + /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB > >>> occlusion > >>> + * counters) must immediately precede every timestamp > >>> event to > >>> + * prevent a GPU hang on GFX9. > >>> + */ > >>> + if (chip_class == GFX9) { > >>> + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); > >>> + radeon_emit(cs, > >>> EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); > >>> + radeon_emit(cs, gfx9_eop_bug_va); > >>> + radeon_emit(cs, gfx9_eop_bug_va >> 32); > >>> + } > >>> + > >>> radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? > >>> 5 : 6, false)); > >>> radeon_emit(cs, op); > >>> radeon_emit(cs, sel); > >>> @@ -772,7 +784,8 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, > >>> uint32_t *flush_cnt, > >>> uint64_t flush_va, > >>> bool is_mec, > >>> - enum radv_cmd_flush_bits flush_bits) > >>> + enum radv_cmd_flush_bits flush_bits, > >>> + uint64_t gfx9_eop_bug_va) > >>> { > >>> unsigned cp_coher_cntl = 0; > >>> uint32_t flush_cb_db = flush_bits & > >>> (RADV_CMD_FLAG_FLUSH_AND_INV_CB | > >>> @@ -803,7 +816,8 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, > >>> > >>> V_028A90_FLUSH_AND_INV_CB_DATA_TS, > >>> 0, > >>> > >>> EOP_DATA_SEL_DISCARD, > >>> - 0, 0, 0); > >>> + 0, 0, 0, > >>> + > >>> gfx9_eop_bug_va); > >>> } > >>> } > >>> if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) { > >>> @@ -873,7 +887,8 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, > >>> si_cs_emit_write_event_eop(cs, chip_class, false, > >>> cb_db_event, tc_flags, > >>> EOP_DATA_SEL_VALUE_32BIT, > >>> - flush_va, old_fence, > >>> *flush_cnt); > >>> + flush_va, old_fence, > >>> *flush_cnt, > >>> + gfx9_eop_bug_va); > >>> si_emit_wait_fence(cs, flush_va, *flush_cnt, > >>> 0xffffffff); > >>> } > >>> @@ -975,7 +990,8 @@ si_emit_cache_flush(struct radv_cmd_buffer > >>> *cmd_buffer) > >>> > >>> cmd_buffer->device->physical_device->rad_info.chip_class, > >>> ptr, va, > >>> radv_cmd_buffer_uses_mec(cmd_buffer), > >>> - cmd_buffer->state.flush_bits); > >>> + cmd_buffer->state.flush_bits, > >>> + cmd_buffer->gfx9_eop_bug_va); > >>> if (unlikely(cmd_buffer->device->trace_bo)) > >>> -- > >>> 2.18.0 > >>> > >>> _______________________________________________ > >>> mesa-stable mailing list > >>> mesa-sta...@lists.freedesktop.org > >>> https://lists.freedesktop.org/mailman/listinfo/mesa-stable > >> > >> Hi Samuel, > >> > >> This patch also doesn't apply cleanly to 18.1. Could I bother you for a > >> backport? > > > > Will do. > > Dylan, I presume the backport should be based on the staging/18.1 branch? >
Yes, please.
signature.asc
Description: signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev