I've commented on some of the patches, but the series is: Reviewed-by: Marek Olšák <marek.ol...@amd.com>
I'd still like to have the preamble CE IB (explained in 00/13) implemented before this lands, so that context flushes are reasonably fast. Only the closed GL driver has been using it so far. Marek On Thu, Apr 14, 2016 at 3:35 AM, Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> wrote: > Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> > --- > src/gallium/drivers/radeonsi/si_descriptors.c | 46 > +++++++++++++++++++++------ > 1 file changed, 36 insertions(+), 10 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c > b/src/gallium/drivers/radeonsi/si_descriptors.c > index 5e26760..5ddb168 100644 > --- a/src/gallium/drivers/radeonsi/si_descriptors.c > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c > @@ -60,6 +60,7 @@ > #include "si_shader.h" > #include "sid.h" > > +#include "util/u_math.h" > #include "util/u_memory.h" > #include "util/u_suballoc.h" > #include "util/u_upload_mgr.h" > @@ -104,7 +105,10 @@ static void si_init_descriptors(struct si_descriptors > *desc, > { > int i; > > - assert(num_elements <= sizeof(desc->enabled_mask)*8); > + /* Ensure that desc->enabled_mask covers all descriptors. The + 1 is > + * to ensure that u_bit_scan_consecutive_range64 never shifts the 1 > + * out of the variable while creating the clear mask. */ > + assert(num_elements + 1 <= sizeof(desc->enabled_mask) * CHAR_BIT); > > desc->list = CALLOC(num_elements, element_dw_size * 4); > desc->element_dw_size = element_dw_size; > @@ -157,24 +161,46 @@ static bool si_upload_descriptors(struct si_context > *sctx, > struct si_descriptors *desc) > { > unsigned list_size = desc->num_elements * desc->element_dw_size * 4; > - void *ptr; > > if (!desc->list_dirty) > return true; > > - u_upload_alloc(sctx->b.uploader, 0, list_size, 256, > - &desc->buffer_offset, > - (struct pipe_resource**)&desc->buffer, &ptr); > - if (!desc->buffer) > - return false; /* skip the draw call */ > + if (sctx->ce_ib) { > + uint32_t const* list = (uint32_t const*)desc->list; > > - util_memcpy_cpu_to_le32(ptr, desc->list, list_size); > + while(desc->dirty_mask) { > + int begin, count; > + u_bit_scan_consecutive_range64(&desc->dirty_mask, > &begin, > + &count); > > - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, > - RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); > + begin *= desc->element_dw_size; > + count *= desc->element_dw_size; > + > + radeon_emit(sctx->ce_ib, > + PKT3(PKT3_WRITE_CONST_RAM, count, 0)); > + radeon_emit(sctx->ce_ib, desc->ce_offset + begin * 4); > + radeon_emit_array(sctx->ce_ib, list + begin, count); > + } > + > + if(!si_ce_upload(sctx, desc->ce_offset, list_size, > + &desc->buffer_offset, &desc->buffer)) > + return false; > + } else { > + void *ptr; > + > + u_upload_alloc(sctx->b.uploader, 0, list_size, 256, > + &desc->buffer_offset, > + (struct pipe_resource**)&desc->buffer, &ptr); > + if (!desc->buffer) > + return false; /* skip the draw call */ > > + util_memcpy_cpu_to_le32(ptr, desc->list, list_size); > + } > + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, > + RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); > desc->list_dirty = false; > desc->pointer_dirty = true; > + desc->dirty_mask = 0; > si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom); > return true; > } > -- > 2.8.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev