This patch should not be so early in the series. It uses the execsize
information in the IR which is not available until patch 38, so it
should really go after that.

Iago

On Tue, 2016-07-19 at 12:40 +0200, Iago Toral Quiroga wrote:
> From: "Juan A. Suarez Romero" <jasua...@igalia.com>
> 
> Our current data flow analysis does not take into account that
> channels
> on 64-bit operands are 64-bit. This is a problem when the same
> register
> is accessed using both 64-bit and 32-bit channels. This is very
> common
> in operations where we need to access 64-bit data in 32-bit chunks,
> such as the double packing and packing operations.
> 
> This patch changes the analysis by checking the bits that each source
> or destination datatype needs. Actually, rather than bits, we use
> blocks of 32bits, which is the minimum channel size.
> 
> Because a vgrf can contain a dvec4 (256 bits), we reserve 8
> 32-bit blocks to map the channels.
> ---
>  .../dri/i965/brw_vec4_dead_code_eliminate.cpp      | 30
> +++++++++++++-----
>  .../drivers/dri/i965/brw_vec4_live_variables.cpp   | 36
> +++++++++++++++++-----
>  .../drivers/dri/i965/brw_vec4_live_variables.h     |  7 +++--
>  3 files changed, 55 insertions(+), 18 deletions(-)
> 
> diff --git
> a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
> index c643212..1f7cd49 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
> @@ -59,10 +59,16 @@ vec4_visitor::dead_code_eliminate()
>              bool result_live[4] = { false };
>  
>              if (inst->dst.file == VGRF) {
> +               const unsigned offs = type_sz(inst->dst.type) == 8 ?
> +                  1 : (inst->exec_size == 4 ? 0 : 4);
>                 for (unsigned i = 0; i < inst->regs_written; i++) {
> -                  for (int c = 0; c < 4; c++)
> -                     result_live[c] |= BITSET_TEST(
> -                        live, var_from_reg(alloc, offset(inst->dst,
> i), c));
> +                  for (int c = 0; c < 4; c++) {
> +                     const unsigned v =
> +                        var_from_reg(alloc, offset(inst->dst, i),
> c);
> +                     result_live[c] |= BITSET_TEST(live, v);
> +                     if (offs)
> +                        result_live[c] |= BITSET_TEST(live, v +
> offs);
> +                  }
>                 }
>              } else {
>                 for (unsigned c = 0; c < 4; c++)
> @@ -110,11 +116,16 @@ vec4_visitor::dead_code_eliminate()
>           }
>  
>           if (inst->dst.file == VGRF && !inst->predicate) {
> +            const unsigned offs = type_sz(inst->dst.type) == 8 ?
> +               1 : (inst->exec_size == 4 ? 0 : 4);
>              for (unsigned i = 0; i < inst->regs_written; i++) {
>                 for (int c = 0; c < 4; c++) {
>                    if (inst->dst.writemask & (1 << c)) {
> -                     BITSET_CLEAR(live, var_from_reg(alloc,
> -                                                     offset(inst-
> >dst, i), c));
> +                     const unsigned v =
> +                        var_from_reg(alloc, offset(inst->dst, i),
> c);
> +                     BITSET_CLEAR(live, v);
> +                     if (offs)
> +                        BITSET_CLEAR(live, v + offs);
>                    }
>                 }
>              }
> @@ -132,10 +143,15 @@ vec4_visitor::dead_code_eliminate()
>  
>           for (int i = 0; i < 3; i++) {
>              if (inst->src[i].file == VGRF) {
> +               const unsigned offs = type_sz(inst->src[i].type) == 8
> ?
> +                  1 : (inst->exec_size == 4 ? 0 : 4);
>                 for (unsigned j = 0; j < inst->regs_read(i); j++) {
>                    for (int c = 0; c < 4; c++) {
> -                     BITSET_SET(live, var_from_reg(alloc,
> -                                                   offset(inst-
> >src[i], j), c));
> +                     const unsigned v =
> +                        var_from_reg(alloc, offset(inst->src[i], j),
> c);
> +                     BITSET_SET(live, v);
> +                     if (offs)
> +                        BITSET_SET(live, v + offs);
>                    }
>                 }
>              }
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
> index 57d5fbb..bfc5e44 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
> @@ -76,12 +76,16 @@ vec4_live_variables::setup_def_use()
>        /* Set use[] for this instruction */
>        for (unsigned int i = 0; i < 3; i++) {
>           if (inst->src[i].file == VGRF) {
> +               const unsigned offs = type_sz(inst->dst.type) == 8 ?
> +                  1 : (inst->exec_size == 4 ? 0 : 4);
>                 for (unsigned j = 0; j < inst->regs_read(i); j++) {
>                    for (int c = 0; c < 4; c++) {
>                       const unsigned v =
>                          var_from_reg(alloc, offset(inst->src[i], j),
> c);
>                       if (!BITSET_TEST(bd->def, v))
>                          BITSET_SET(bd->use, v);
> +                     if (offs && !BITSET_TEST(bd->def, v + offs))
> +                        BITSET_SET(bd->use, v + offs);
>                    }
>                 }
>           }
> @@ -99,6 +103,8 @@ vec4_live_variables::setup_def_use()
>         */
>        if (inst->dst.file == VGRF &&
>            (!inst->predicate || inst->opcode == BRW_OPCODE_SEL)) {
> +            const unsigned offs = type_sz(inst->dst.type) == 8 ?
> +               1 : (inst->exec_size == 4 ? 0 : 4);
>              for (unsigned i = 0; i < inst->regs_written; i++) {
>                 for (int c = 0; c < 4; c++) {
>                    if (inst->dst.writemask & (1 << c)) {
> @@ -106,6 +112,8 @@ vec4_live_variables::setup_def_use()
>                          var_from_reg(alloc, offset(inst->dst, i),
> c);
>                       if (!BITSET_TEST(bd->use, v))
>                          BITSET_SET(bd->def, v);
> +                     if (offs && !BITSET_TEST(bd->use, v + offs))
> +                        BITSET_SET(bd->def, v + offs);
>                    }
>                 }
>              }
> @@ -188,7 +196,7 @@ vec4_live_variables::vec4_live_variables(const
> simple_allocator &alloc,
>  {
>     mem_ctx = ralloc_context(NULL);
>  
> -   num_vars = alloc.total_size * 4;
> +   num_vars = alloc.total_size * 8;
>     block_data = rzalloc_array(mem_ctx, struct block_data, cfg-
> >num_blocks);
>  
>     bitset_words = BITSET_WORDS(num_vars);
> @@ -238,14 +246,14 @@ vec4_visitor::calculate_live_intervals()
>     if (this->live_intervals)
>        return;
>  
> -   int *start = ralloc_array(mem_ctx, int, this->alloc.total_size *
> 4);
> -   int *end = ralloc_array(mem_ctx, int, this->alloc.total_size *
> 4);
> +   int *start = ralloc_array(mem_ctx, int, this->alloc.total_size *
> 8);
> +   int *end = ralloc_array(mem_ctx, int, this->alloc.total_size *
> 8);
>     ralloc_free(this->virtual_grf_start);
>     ralloc_free(this->virtual_grf_end);
>     this->virtual_grf_start = start;
>     this->virtual_grf_end = end;
>  
> -   for (unsigned i = 0; i < this->alloc.total_size * 4; i++) {
> +   for (unsigned i = 0; i < this->alloc.total_size * 8; i++) {
>        start[i] = MAX_INSTRUCTION;
>        end[i] = -1;
>     }
> @@ -257,18 +265,26 @@ vec4_visitor::calculate_live_intervals()
>     foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
>        for (unsigned int i = 0; i < 3; i++) {
>        if (inst->src[i].file == VGRF) {
> +            const unsigned offs = type_sz(inst->dst.type) == 8 ?
> +               1 : (inst->exec_size == 4 ? 0 : 4);
>              for (unsigned j = 0; j < inst->regs_read(i); j++) {
>                 for (int c = 0; c < 4; c++) {
>                    const unsigned v =
>                       var_from_reg(alloc, offset(inst->src[i], j),
> c);
>                    start[v] = MIN2(start[v], ip);
>                    end[v] = ip;
> +                  if (offs) {
> +                     start[v + offs] = MIN2(start[v + offs], ip);
> +                     end[v + offs] = ip;
> +                  }
>                 }
>              }
>        }
>        }
>  
>        if (inst->dst.file == VGRF) {
> +         const unsigned offs = type_sz(inst->dst.type) == 8 ?
> +            1 : (inst->exec_size == 4 ? 0 : 4);
>           for (unsigned i = 0; i < inst->regs_written; i++) {
>              for (int c = 0; c < 4; c++) {
>                 if (inst->dst.writemask & (1 << c)) {
> @@ -276,6 +292,10 @@ vec4_visitor::calculate_live_intervals()
>                       var_from_reg(alloc, offset(inst->dst, i), c);
>                    start[v] = MIN2(start[v], ip);
>                    end[v] = ip;
> +                  if (offs) {
> +                     start[v + offs] = MIN2(start[v + offs], ip);
> +                     end[v + offs] = ip;
> +                  }
>                 }
>              }
>           }
> @@ -340,8 +360,8 @@ vec4_visitor::var_range_end(unsigned v, unsigned
> n) const
>  bool
>  vec4_visitor::virtual_grf_interferes(int a, int b)
>  {
> -   return !((var_range_end(4 * alloc.offsets[a], 4 * alloc.sizes[a])
> <=
> -             var_range_start(4 * alloc.offsets[b], 4 *
> alloc.sizes[b])) ||
> -            (var_range_end(4 * alloc.offsets[b], 4 * alloc.sizes[b])
> <=
> -             var_range_start(4 * alloc.offsets[a], 4 *
> alloc.sizes[a])));
> +   return !((var_range_end(8 * alloc.offsets[a], 8 * alloc.sizes[a])
> <=
> +             var_range_start(8 * alloc.offsets[b], 8 *
> alloc.sizes[b])) ||
> +            (var_range_end(8 * alloc.offsets[b], 8 * alloc.sizes[b])
> <=
> +             var_range_start(8 * alloc.offsets[a], 8 *
> alloc.sizes[a])));
>  }
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
> b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
> index 12d281e..cdbd7dc 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
> @@ -84,8 +84,8 @@ var_from_reg(const simple_allocator &alloc, const
> src_reg &reg,
>  {
>     assert(reg.file == VGRF && reg.nr < alloc.count &&
>            reg.reg_offset < alloc.sizes[reg.nr] && c < 4);
> -   return (4 * (alloc.offsets[reg.nr] + reg.reg_offset) +
> -           BRW_GET_SWZ(reg.swizzle, c));
> +   return (8 * (alloc.offsets[reg.nr] + reg.reg_offset) +
> +           BRW_GET_SWZ(reg.swizzle, c) * MAX2(1, type_sz(reg.type) /
> 4));
>  }
>  
>  inline unsigned
> @@ -94,7 +94,8 @@ var_from_reg(const simple_allocator &alloc, const
> dst_reg &reg,
>  {
>     assert(reg.file == VGRF && reg.nr < alloc.count &&
>            reg.reg_offset < alloc.sizes[reg.nr] && c < 4);
> -   return 4 * (alloc.offsets[reg.nr] + reg.reg_offset) + c;
> +   return 8 * (alloc.offsets[reg.nr] + reg.reg_offset) +
> +          c * MAX2(1, type_sz(reg.type) / 4);
>  }
>  
>  } /* namespace brw */
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to