Iago Toral Quiroga <ito...@igalia.com> writes:

> So that it has the same semantics as the scalar backend implementation. The
> helper will now take a simd width (which is always 8 in vec4 mode) and step
> as many scalar components as specified by that width, respecting the size of
> the scalar channels.
> ---
>  src/mesa/drivers/dri/i965/brw_ir_vec4.h                | 12 ++++++++----
>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp             |  2 +-
>  src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp | 14 +++++++-------
>  3 files changed, 16 insertions(+), 12 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h 
> b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> index ef79e33..3fa64a8 100644
> --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> @@ -100,11 +100,13 @@ byte_offset(src_reg reg, unsigned bytes)
>  }
>  
>  static inline src_reg
> -offset(src_reg reg, unsigned delta)
> +offset(src_reg reg, unsigned width, unsigned delta)
>  {
>     assert(delta == 0 ||
>            (reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM));

You can drop this assertion now since byte_offset() is checking for the
same thing.

> -   reg.offset += delta * (reg.file == UNIFORM ? 16 : REG_SIZE);
> +   unsigned bytes =
> +      (reg.file != UNIFORM ? width : 4) * delta * type_sz(reg.type);
> +   add_byte_offset(&reg, bytes);
>     return reg;
>  }

Now that we have a byte_offset() helper we should probably take
advantage of it here.  It also seemed rather surprising at first glance
that the width argument was ignored for uniforms, but the reason would
be clear if you wrote down the expression for the vertical stride of the
region explicitly, like:

| static inline src_reg
| offset(const src_reg &reg, unsigned width, unsigned delta)
| {
|    const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
|    const unsigned num_components = MAX2(width / 4 * stride, 4);
|    return byte_offset(reg, num_components * type_sz(reg.type) * delta);
| }

Note that this also makes sure that we step by a whole multiple of a
vec4.  With that changed [and the other offset function below] patch is:

Reviewed-by: Francisco Jerez <curroje...@riseup.net>

>  
> @@ -176,11 +178,13 @@ byte_offset(dst_reg reg, unsigned bytes)
>  }
>  
>  static inline dst_reg
> -offset(dst_reg reg, unsigned delta)
> +offset(dst_reg reg, unsigned width, unsigned delta)
>  {
>     assert(delta == 0 ||
>            (reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM));
> -   reg.offset += delta * (reg.file == UNIFORM ? 16 : REG_SIZE);
> +   unsigned bytes =
> +      (reg.file != UNIFORM ? width : 4) * delta * type_sz(reg.type);
> +   add_byte_offset(&reg, bytes);
>     return reg;
>  }
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index ba3bbdf..07e9249 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -256,7 +256,7 @@ dst_reg_for_nir_reg(vec4_visitor *v, nir_register 
> *nir_reg,
>     dst_reg reg;
>  
>     reg = v->nir_locals[nir_reg->index];
> -   reg = offset(reg, base_offset);
> +   reg = offset(reg, 8, base_offset);
>     if (indirect) {
>        reg.reladdr =
>           new(v->mem_ctx) src_reg(v->get_nir_src(*indirect,
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
> index 19c685f..00c94fe 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
> @@ -42,9 +42,9 @@ namespace {
>                                           DIV_ROUND_UP(size * dst_stride, 4));
>  
>              for (unsigned i = 0; i < size; ++i)
> -               bld.MOV(writemask(offset(dst, i * dst_stride / 4),
> +               bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4),
>                                   1 << (i * dst_stride % 4)),
> -                       swizzle(offset(src, i * src_stride / 4),
> +                       swizzle(offset(src, 8, i * src_stride / 4),
>                                 brw_swizzle_for_mask(1 << (i * src_stride % 
> 4))));
>  
>              return src_reg(dst);
> @@ -124,16 +124,16 @@ namespace brw {
>              unsigned n = 0;
>  
>              if (header_sz)
> -               bld.exec_all().MOV(offset(payload, n++),
> +               bld.exec_all().MOV(offset(payload, 8, n++),
>                                    retype(header, BRW_REGISTER_TYPE_UD));
>  
>              for (unsigned i = 0; i < addr_sz; i++)
> -               bld.MOV(offset(payload, n++),
> -                       offset(retype(addr, BRW_REGISTER_TYPE_UD), i));
> +               bld.MOV(offset(payload, 8, n++),
> +                       offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i));
>  
>              for (unsigned i = 0; i < src_sz; i++)
> -               bld.MOV(offset(payload, n++),
> -                       offset(retype(src, BRW_REGISTER_TYPE_UD), i));
> +               bld.MOV(offset(payload, 8, n++),
> +                       offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i));
>  
>              /* Reduce the dynamically uniform surface index to a single
>               * scalar.
> -- 
> 2.7.4

Attachment: signature.asc
Description: PGP signature

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to