We can then upload only the dirty ones with the constant engine. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> --- src/gallium/drivers/radeonsi/si_descriptors.c | 37 ++++++++++++++++----------- src/gallium/drivers/radeonsi/si_state.h | 9 +++++-- 2 files changed, 29 insertions(+), 17 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 0b44ecf..8ca0253 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -109,7 +109,7 @@ static void si_init_descriptors(struct si_descriptors *desc, desc->list = CALLOC(num_elements, element_dw_size * 4); desc->element_dw_size = element_dw_size; desc->num_elements = num_elements; - desc->list_dirty = true; /* upload the list before the next draw */ + desc->dirty_mask = num_elements == 64 ? ~0llu : (1llu << num_elements) - 1; desc->shader_userdata_offset = shader_userdata_index * 4; desc->ce_offset = *ce_offset; @@ -159,7 +159,7 @@ static bool si_upload_descriptors(struct si_context *sctx, unsigned list_size = desc->num_elements * desc->element_dw_size * 4; void *ptr; - if (!desc->list_dirty) + if (!desc->dirty_mask) return true; u_upload_alloc(sctx->b.uploader, 0, list_size, 256, @@ -173,7 +173,7 @@ static bool si_upload_descriptors(struct si_context *sctx, radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); - desc->list_dirty = false; + desc->dirty_mask = 0; desc->pointer_dirty = true; si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom); return true; @@ -216,6 +216,8 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx, si_sampler_view_add_buffer(sctx, views->views[i]->texture); } + views->desc.ce_ram_dirty = true; + if (!views->desc.buffer) return; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, views->desc.buffer, @@ -267,7 +269,7 @@ static void si_set_sampler_view(struct si_context *sctx, views->desc.enabled_mask &= ~(1llu << slot); } - views->desc.list_dirty = true; + views->desc.dirty_mask |= 1llu << slot; } static bool is_compressed_colortex(struct r600_texture *rtex) @@ -373,6 +375,8 @@ si_image_views_begin_new_cs(struct si_context *sctx, struct si_images_info *imag si_sampler_view_add_buffer(sctx, view->resource); } + images->desc.ce_ram_dirty = true; + if (images->desc.buffer) { radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, images->desc.buffer, @@ -390,7 +394,7 @@ si_disable_shader_image(struct si_images_info *images, unsigned slot) memcpy(images->desc.list + slot*8, null_image_descriptor, 8*4); images->desc.enabled_mask &= ~(1llu << slot); - images->desc.list_dirty = true; + images->desc.dirty_mask |= 1llu << slot; } } @@ -471,7 +475,7 @@ si_set_shader_images(struct pipe_context *pipe, unsigned shader, } images->desc.enabled_mask |= 1llu << slot; - images->desc.list_dirty = true; + images->desc.dirty_mask |= 1llu << slot; } } @@ -529,7 +533,7 @@ static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, continue; memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4); - desc->list_dirty = true; + desc->dirty_mask |= 1llu << slot; } } @@ -576,6 +580,8 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx, buffers->shader_usage, buffers->priority); } + buffers->desc.ce_ram_dirty = true; + if (!buffers->desc.buffer) return; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, @@ -772,7 +778,7 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s buffers->desc.enabled_mask &= ~(1llu << slot); } - buffers->desc.list_dirty = true; + buffers->desc.dirty_mask |= 1llu << slot; } /* SHADER BUFFERS */ @@ -819,9 +825,9 @@ static void si_set_shader_buffers(struct pipe_context *ctx, unsigned shader, radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, buf, buffers->shader_usage, buffers->priority); buffers->desc.enabled_mask |= 1llu << slot; + buffers->desc.dirty_mask |= 1llu << slot; } - buffers->desc.list_dirty = true; } /* RING BUFFERS */ @@ -916,7 +922,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot, buffers->desc.enabled_mask &= ~(1llu << slot); } - buffers->desc.list_dirty = true; + buffers->desc.dirty_mask |= 1llu << slot; } /* STREAMOUT BUFFERS */ @@ -1014,6 +1020,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx, NULL); buffers->desc.enabled_mask &= ~(1llu << bufidx); } + buffers->desc.dirty_mask |= 1llu << bufidx; } for (; i < old_num_targets; i++) { bufidx = SI_SO_BUF_OFFSET + i; @@ -1021,9 +1028,9 @@ static void si_set_streamout_targets(struct pipe_context *ctx, memset(buffers->desc.list + bufidx*4, 0, sizeof(uint32_t) * 4); pipe_resource_reference(&buffers->buffers[bufidx], NULL); buffers->desc.enabled_mask &= ~(1llu << bufidx); + buffers->desc.dirty_mask |= 1llu << bufidx; } - buffers->desc.list_dirty = true; } static void si_desc_reset_buffer_offset(struct pipe_context *ctx, @@ -1075,7 +1082,7 @@ static void si_reset_buffer_resources(struct si_context *sctx, si_desc_reset_buffer_offset(&sctx->b.b, buffers->desc.list + i*4, old_va, buf); - buffers->desc.list_dirty = true; + buffers->desc.dirty_mask |= 1llu << i; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, (struct r600_resource *)buf, @@ -1137,7 +1144,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource if (buffers->buffers[i] == buf) { si_desc_reset_buffer_offset(ctx, buffers->desc.list + i*4, old_va, buf); - buffers->desc.list_dirty = true; + buffers->desc.dirty_mask |= 1llu << i; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, rbuffer, buffers->shader_usage, @@ -1182,7 +1189,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource views->desc.list + i * 16 + 4, old_va, buf); - views->desc.list_dirty = true; + views->desc.dirty_mask |= 1llu << i; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, rbuffer, RADEON_USAGE_READ, @@ -1203,7 +1210,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource si_desc_reset_buffer_offset( ctx, images->desc.list + i * 8 + 4, old_va, buf); - images->desc.list_dirty = true; + images->desc.dirty_mask |= 1llu << i; radeon_add_to_buffer_list( &sctx->b, &sctx->b.gfx, rbuffer, diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index fbdc8ee..a0ae72e 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -191,8 +191,6 @@ struct si_descriptors { unsigned element_dw_size; /* The maximum number of descriptors. */ unsigned num_elements; - /* Whether the list has been changed and should be re-uploaded. */ - bool list_dirty; /* The buffer where the descriptors have been uploaded. */ struct r600_resource *buffer; @@ -204,6 +202,13 @@ struct si_descriptors { /* The i-th bit is set if that element is enabled (non-NULL resource). */ uint64_t enabled_mask; + /* elements of the list that are changed and need to be uploaded */ + uint64_t dirty_mask; + + /* Whether the CE ram is dirty and needs to be reinitialized entirely + * before we can do partial updates. */ + bool ce_ram_dirty; + /* The shader userdata offset within a shader where the 64-bit pointer to the descriptor * array will be stored. */ unsigned shader_userdata_offset; -- 2.8.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev