Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>
On Sun, Feb 12, 2017, at 20:11, Dave Airlie wrote: > From: Dave Airlie <airl...@redhat.com> > > If a buffer is just full of flushes we flush things on command > buffer submission, so don't bother submitting these. > > This will reduce some CPU overhead on dota2, which submits a fair > few command streams that don't end up drawing anything. > > v2: reorganise loop to count first then malloc, > rename some vars (Bas) > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > src/amd/vulkan/radv_cmd_buffer.c | 3 +++ > src/amd/vulkan/radv_device.c | 27 ++++++++++++++++++++------- > src/amd/vulkan/radv_meta_buffer.c | 1 + > src/amd/vulkan/radv_private.h | 2 ++ > src/amd/vulkan/si_cmd_buffer.c | 2 +- > 5 files changed, 27 insertions(+), 8 deletions(-) > > diff --git a/src/amd/vulkan/radv_cmd_buffer.c > b/src/amd/vulkan/radv_cmd_buffer.c > index f281f33..25b1bd6 100644 > --- a/src/amd/vulkan/radv_cmd_buffer.c > +++ b/src/amd/vulkan/radv_cmd_buffer.c > @@ -1277,6 +1277,7 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer > *cmd_buffer) > MAYBE_UNUSED unsigned cdw_max = > radeon_check_space(cmd_buffer->device->ws, > cmd_buffer->cs, > 4096); > > + cmd_buffer->no_draws = false; > if ((cmd_buffer->state.vertex_descriptors_dirty || > cmd_buffer->state.vb_dirty) && > cmd_buffer->state.pipeline->num_vertex_attribs) { > unsigned vb_offset; > @@ -1592,6 +1593,7 @@ static void radv_reset_cmd_buffer(struct > radv_cmd_buffer *cmd_buffer) > cmd_buffer->record_fail = false; > > cmd_buffer->ring_offsets_idx = -1; > + cmd_buffer->no_draws = true; > } > > VkResult radv_ResetCommandBuffer( > @@ -2423,6 +2425,7 @@ void radv_CmdDrawIndexedIndirectCountAMD( > static void > radv_flush_compute_state(struct radv_cmd_buffer *cmd_buffer) > { > + cmd_buffer->no_draws = false; > radv_emit_compute_pipeline(cmd_buffer); > radv_flush_descriptors(cmd_buffer, cmd_buffer->state.compute_pipeline, > VK_SHADER_STAGE_COMPUTE_BIT); > diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c > index 8a54a2a..38848f9 100644 > --- a/src/amd/vulkan/radv_device.c > +++ b/src/amd/vulkan/radv_device.c > @@ -1425,8 +1425,18 @@ VkResult radv_QueueSubmit( > struct radeon_winsys_cs **cs_array; > bool can_patch = true; > uint32_t advance; > + int draw_cmd_buffers_count = 0; > > - if (!pSubmits[i].commandBufferCount) { > + for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; > j++) { > + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, > + pSubmits[i].pCommandBuffers[j]); > + assert(cmd_buffer->level == > VK_COMMAND_BUFFER_LEVEL_PRIMARY); > + if (cmd_buffer->no_draws == true) > + continue; > + draw_cmd_buffers_count++; > + } > + > + if (!draw_cmd_buffers_count) { > if (pSubmits[i].waitSemaphoreCount || > pSubmits[i].signalSemaphoreCount) { > ret = queue->device->ws->cs_submit(ctx, > queue->queue_idx, > > &queue->device->empty_cs[queue->queue_family_index], > @@ -1445,24 +1455,27 @@ VkResult radv_QueueSubmit( > continue; > } > > - cs_array = malloc(sizeof(struct radeon_winsys_cs *) * > - > pSubmits[i].commandBufferCount); > + cs_array = malloc(sizeof(struct radeon_winsys_cs *) * > draw_cmd_buffers_count); > > + int draw_cmd_buffer_idx = 0; > for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { > RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, > pSubmits[i].pCommandBuffers[j]); > assert(cmd_buffer->level == > VK_COMMAND_BUFFER_LEVEL_PRIMARY); > + if (cmd_buffer->no_draws == true) > + continue; > > - cs_array[j] = cmd_buffer->cs; > + cs_array[draw_cmd_buffer_idx] = cmd_buffer->cs; > + draw_cmd_buffer_idx++; > if ((cmd_buffer->usage_flags & > VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) > can_patch = false; > } > > - for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; > j += advance) { > + for (uint32_t j = 0; j < draw_cmd_buffers_count; j += > advance) { > advance = MIN2(max_cs_submission, > - pSubmits[i].commandBufferCount - > j); > + draw_cmd_buffers_count - j); > bool b = j == 0; > - bool e = j + advance == > pSubmits[i].commandBufferCount; > + bool e = j + advance == draw_cmd_buffers_count; > > if (queue->device->trace_bo) > *queue->device->trace_id_ptr = 0; > diff --git a/src/amd/vulkan/radv_meta_buffer.c > b/src/amd/vulkan/radv_meta_buffer.c > index cd2973f..4857d3d 100644 > --- a/src/amd/vulkan/radv_meta_buffer.c > +++ b/src/amd/vulkan/radv_meta_buffer.c > @@ -523,6 +523,7 @@ void radv_CmdUpdateBuffer( > assert(!(dataSize & 3)); > assert(!(va & 3)); > > + cmd_buffer->no_draws = false; > if (dataSize < 4096) { > cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, > dst_buffer->bo, 8); > > diff --git a/src/amd/vulkan/radv_private.h > b/src/amd/vulkan/radv_private.h > index 25ed5de..9a88ce0 100644 > --- a/src/amd/vulkan/radv_private.h > +++ b/src/amd/vulkan/radv_private.h > @@ -750,6 +750,8 @@ struct radv_cmd_buffer { > uint32_t gsvs_ring_size_needed; > > int ring_offsets_idx; /* just used for verification */ > + > + bool no_draws; > }; > > struct radv_image; > diff --git a/src/amd/vulkan/si_cmd_buffer.c > b/src/amd/vulkan/si_cmd_buffer.c > index 1c99b22..b94c1f1 100644 > --- a/src/amd/vulkan/si_cmd_buffer.c > +++ b/src/amd/vulkan/si_cmd_buffer.c > @@ -828,7 +828,7 @@ static void si_emit_cp_dma_clear_buffer(struct > radv_cmd_buffer *cmd_buffer, > static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, > uint64_t byte_count, > uint64_t remaining_size, unsigned *flags) > { > - > + cmd_buffer->no_draws = false; > /* Flush the caches for the first copy only. > * Also wait for the previous CP DMA operations. > */ > -- > 2.7.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev