On Tuesday, July 17, 2018 9:40:29 AM PDT Lionel Landwerlin wrote:
> On 11/07/18 19:25, Kenneth Graunke wrote:
> > Normally, i965 programs STATE_BASE_ADDRESS every batch, and puts all
> > state for a given base in a single buffer.
> >
> > I'm working on a prototype which emits STATE_BASE_ADDRESS only once at
> > startup, where each base address is a fixed 4GB region of the PPGTT.
> > State may live in many buffers in that 4GB region, even if there isn't
> > a buffer located at the actual base address itself.
> >
> > To handle this, we need to save the STATE_BASE_ADDRESS values across
> > multiple batches, rather than assuming we'll see the command each time.
> > Then, each time we see a pointer, we need to ask the driver for the BO
> > map for that data.  (We can't just use the map for the base address, as
> > state may be in multiple buffers, and there may not even be a buffer
> > at the base address to map.)
> > ---
> >   src/intel/common/gen_batch_decoder.c | 83 ++++++++++++++++------------
> >   src/intel/common/gen_decoder.h       |  9 ++-
> >   2 files changed, 56 insertions(+), 36 deletions(-)
> >
> > diff --git a/src/intel/common/gen_batch_decoder.c 
> > b/src/intel/common/gen_batch_decoder.c
> > index fe7536da9ec..6cb66bcb257 100644
> > --- a/src/intel/common/gen_batch_decoder.c
> > +++ b/src/intel/common/gen_batch_decoder.c
> > @@ -128,13 +128,13 @@ static void
> >   ctx_disassemble_program(struct gen_batch_decode_ctx *ctx,
> >                           uint32_t ksp, const char *type)
> >   {
> > -   if (!ctx->instruction_base.map)
> > +   uint64_t addr = ctx->instruction_base.addr + ksp;
> > +   struct gen_batch_decode_bo bo = ctx_get_bo(ctx, addr);
> > +   if (!bo.map)
> >         return;
> >   
> > -   printf("\nReferenced %s:\n", type);
> > -   gen_disasm_disassemble(ctx->disasm,
> > -                          (void *)ctx->instruction_base.map, ksp,
> > -                          ctx->fp);
> > +   fprintf(ctx->fp, "\nReferenced %s:\n", type);
> > +   gen_disasm_disassemble(ctx->disasm, bo.map, 0, ctx->fp);
> >   }
> >   
> >   /* Heuristic to determine whether a uint32_t is probably actually a float
> > @@ -225,35 +225,30 @@ dump_binding_table(struct gen_batch_decode_ctx *ctx, 
> > uint32_t offset, int count)
> >      if (count < 0)
> >         count = update_count(ctx, offset, 1, 8);
> >   
> > -   if (ctx->surface_base.map == NULL) {
> > +   struct gen_batch_decode_bo bind_bo =
> > +      ctx_get_bo(ctx, ctx->surface_base.addr + offset);
> > +
> > +   if (bind_bo.map == NULL) {
> >         fprintf(ctx->fp, "  binding table unavailable\n");
> >         return;
> >      }
> >   
> > -   if (offset % 32 != 0 || offset >= UINT16_MAX ||
> > -       offset >= ctx->surface_base.size) {
> > +   if (offset % 32 != 0 || offset >= UINT16_MAX || offset >= bind_bo.size) 
> > {
> 
> I wonder if this
> 
> offset >= bind_bo.size
> 
> is right. That's assuming bind_bo.addr == ctx->surface_base.addr, but in 
> your prototype it probably won't be, right?
> 
> 
> I would check (ctx->surface_base.addr + offset) >= (bind_bo.addr + 
> bind_bo.size)

Yeah, I agree, this is wrong.  I think we should just drop the
offset >= bind_bo.size check.  We're already doing ctx_get_bo()
at surface_base.addr + offset...and since we've checked bind_bo.map
isn't NULL...we know the start of the binding table is in the BO.

I'll also move these checks above the ctx_get_bo() call, so we validate
the alignment and maximum amount restrictions from the packet before
using it to look up anything.

> >         fprintf(ctx->fp, "  invalid binding table pointer\n");
> >         return;
> >      }
> >   
> > -   struct gen_batch_decode_bo bo = ctx->surface_base;
> > -   const uint32_t *pointers = ctx->surface_base.map + offset;
> > +   const uint32_t *pointers = bind_bo.map;
> >      for (int i = 0; i < count; i++) {
> >         if (pointers[i] == 0)
> >            continue;
> >   
> > -      if (pointers[i] % 32 != 0) {
> > -         fprintf(ctx->fp, "pointer %u: %08x <not valid>\n", i, 
> > pointers[i]);
> > -         continue;
> > -      }
> > -
> >         uint64_t addr = ctx->surface_base.addr + pointers[i];
> > +      struct gen_batch_decode_bo bo = ctx_get_bo(ctx, addr);
> >         uint32_t size = strct->dw_length * 4;
> >   
> > -      if (addr < bo.addr || addr + size >= bo.addr + bo.size)
> > -         bo = ctx->get_bo(ctx->user_data, addr);
> > -
> > -      if (addr < bo.addr || addr + size >= bo.addr + bo.size) {
> > +      if (pointers[i] % 32 != 0 ||
> > +          addr < bo.addr || addr + size >= bo.addr + bo.size) {
> >            fprintf(ctx->fp, "pointer %u: %08x <not valid>\n", i, 
> > pointers[i]);
> >            continue;
> >         }
> > @@ -271,18 +266,20 @@ dump_samplers(struct gen_batch_decode_ctx *ctx, 
> > uint32_t offset, int count)
> >      if (count < 0)
> >         count = update_count(ctx, offset, strct->dw_length, 4);
> >   
> > -   if (ctx->dynamic_base.map == NULL) {
> > +   uint64_t state_addr = ctx->dynamic_base.addr + offset;
> > +   struct gen_batch_decode_bo bo = ctx_get_bo(ctx, state_addr);
> > +   const void *state_map = bo.map;
> > +
> > +   if (state_map == NULL) {
> >         fprintf(ctx->fp, "  samplers unavailable\n");
> >         return;
> >      }
> >   
> > -   if (offset % 32 != 0 || offset >= ctx->dynamic_base.size) {
> > +   if (offset % 32 != 0 || state_addr - bo.addr >= bo.size) {
> >         fprintf(ctx->fp, "  invalid sampler state pointer\n");
> >         return;
> >      }
> >   
> > -   uint64_t state_addr = ctx->dynamic_base.addr + offset;
> > -   const void *state_map = ctx->dynamic_base.map + offset;
> >      for (int i = 0; i < count; i++) {
> >         fprintf(ctx->fp, "sampler state %d\n", i);
> >         ctx_print_group(ctx, strct, state_addr, state_map);
> > @@ -295,9 +292,6 @@ static void
> >   handle_media_interface_descriptor_load(struct gen_batch_decode_ctx *ctx,
> >                                          const uint32_t *p)
> >   {
> > -   if (ctx->dynamic_base.map == NULL)
> > -      return;
> > -
> >      struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> >      struct gen_group *desc =
> >         gen_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
> > @@ -316,7 +310,12 @@ handle_media_interface_descriptor_load(struct 
> > gen_batch_decode_ctx *ctx,
> >      }
> >   
> >      uint64_t desc_addr = ctx->dynamic_base.addr + descriptor_offset;
> > -   const uint32_t *desc_map = ctx->dynamic_base.map + descriptor_offset;
> > +   struct gen_batch_decode_bo bo = ctx_get_bo(ctx, desc_addr);
> > +   const void *desc_map = bo.map;
> > +
> > +   if (desc_map == NULL)
> > +      return;
> 
> Maybe a message here?
> 

Sure, I'll add

      fprintf(ctx->fp, "  interface descriptors unavailable\n");

and some braces.

> > +
> >      for (int i = 0; i < descriptor_count; i++) {
> >         fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset);
> >   
> > @@ -640,11 +639,6 @@ decode_dynamic_state_pointers(struct 
> > gen_batch_decode_ctx *ctx,
> >                                 const char *struct_type, const uint32_t *p,
> >                                 int count)
> >   {
> > -   if (ctx->dynamic_base.map == NULL) {
> > -      fprintf(ctx->fp, "  dynamic %s state unavailable\n", struct_type);
> > -      return;
> > -   }
> > -
> >      struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> >      struct gen_group *state = gen_spec_find_struct(ctx->spec, struct_type);
> >   
> > @@ -659,8 +653,15 @@ decode_dynamic_state_pointers(struct 
> > gen_batch_decode_ctx *ctx,
> >         }
> >      }
> >   
> > -   uint32_t state_addr = ctx->dynamic_base.addr + state_offset;
> > -   const uint32_t *state_map = ctx->dynamic_base.map + state_offset;
> > +   uint64_t state_addr = ctx->dynamic_base.addr + state_offset;
> > +   struct gen_batch_decode_bo bo = ctx_get_bo(ctx, state_addr);
> > +   const void *state_map = bo.map;
> > +
> > +   if (state_map == NULL) {
> > +      fprintf(ctx->fp, "  dynamic %s state unavailable\n", struct_type);
> > +      return;
> > +   }
> > +
> >      for (int i = 0; i < count; i++) {
> >         fprintf(ctx->fp, "%s %d\n", struct_type, i);
> >         ctx_print_group(ctx, state, state_offset, state_map);
> > @@ -787,6 +788,20 @@ gen_print_batch(struct gen_batch_decode_ctx *ctx,
> >      int length;
> >      struct gen_group *inst;
> >   
> > +   /* If we have an address for base addresses, but no map, try and fetch
> > +    * them again.  We might have a pre-programmed address but no buffer
> > +    * there (say, because no surfaces were in use the first time).
> > +    */
> > +   if (ctx->surface_base.map == NULL && ctx->surface_base.addr != 0) {
> > +      ctx->surface_base = ctx_get_bo(ctx, ctx->surface_base.addr);
> > +   }
> > +   if (ctx->dynamic_base.map == NULL && ctx->dynamic_base.addr != 0) {
> > +      ctx->dynamic_base = ctx_get_bo(ctx, ctx->dynamic_base.addr);
> > +   }
> > +   if (ctx->instruction_base.map == NULL && ctx->instruction_base.addr != 
> > 0) {
> > +      ctx->instruction_base = ctx_get_bo(ctx, ctx->instruction_base.addr);
> > +   }
> 
> Not really sure why you bother getting the BOs here.
> If you're going to get them for each address field we encounter in the 
> rest of the decoder, that's probably not necessary.

Yep.  Useless.  I'd added this originally because I wasn't getting
Surface State Base Address, and this sort-of fixed that
problem...because I happen to have a BO there.  But it wasn't remotely
complete or correct, so I fixed it properly...and now this isn't needed.

Dropped.  Thanks for the review!  I'll send a v2 shortly.

Attachment: signature.asc
Description: This is a digitally signed message part.

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to