On 9 August 2013 17:14, Chris Forbes <chr...@ijw.co.nz> wrote:

> Splits the bottom 8 bits of f0.0 for further wrangling
> in a SIMD4x2 program. The 4 bits corresponding to the channels in each
> program flow are copied to the LSBs of dst.x visible to each flow.
>
> This is useful for working with clipping flags in the VS.
>
> Signed-off-by: Chris Forbes <chr...@ijw.co.nz>
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h     |  1 +
>  src/mesa/drivers/dri/i965/brw_shader.cpp    |  2 ++
>  src/mesa/drivers/dri/i965/brw_vec4.h        |  2 ++
>  src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 23 +++++++++++++++++++++++
>  4 files changed, 28 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index d8b3b17..2ab0a2b 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -798,6 +798,7 @@ enum opcode {
>     VS_OPCODE_SCRATCH_WRITE,
>     VS_OPCODE_PULL_CONSTANT_LOAD,
>     VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
> +   VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
>  };
>
>  #define BRW_PREDICATE_NONE             0
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp
> b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index 9a2e8be..6c7e827 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -494,6 +494,8 @@ brw_instruction_name(enum opcode op)
>        return "pull_constant_load";
>     case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
>        return "pull_constant_load_gen7";
> +   case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
> +      return "unpack_flags_simd4x2";
>
>     default:
>        /* Yes, this leaks.  It's in debug code, it should never occur, and
> if
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h
> b/src/mesa/drivers/dri/i965/brw_vec4.h
> index 18e0d56..10836cc 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -587,6 +587,8 @@ private:
>                                           struct brw_reg dst,
>                                           struct brw_reg surf_index,
>                                           struct brw_reg offset);
> +   void generate_unpack_flags(vec4_instruction *inst,
> +                              struct brw_reg dst);
>
>     struct brw_context *brw;
>     struct gl_context *ctx;
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> index c82af0e..a81f045 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> @@ -440,6 +440,25 @@
> vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
>  }
>
>  void
> +vec4_generator::generate_unpack_flags(vec4_instruction *inst,
> +                                      struct brw_reg dst)
> +{
>
+   brw_push_insn_state(p);
> +   brw_set_mask_control(p, BRW_MASK_DISABLE);
> +   brw_set_access_mode(p, BRW_ALIGN_1);
> +
> +   struct brw_reg flags = brw_flag_reg(0, 0);
> +   struct brw_reg dst_0 = suboffset(vec1(dst), 0);
> +   struct brw_reg dst_4 = suboffset(vec1(dst), 4);
> +
> +   brw_AND(p, dst_0, flags, brw_imm_uw(0x0f));
> +   brw_AND(p, dst_4, flags, brw_imm_uw(0xf0));
> +   brw_SHR(p, dst_4, dst_4, brw_imm_uw(4));
>

Shouldn't these 3 immediates use brw_imm_ud?  I see from patch 5 that
you're using a destination register of type UD, IIRC only MOV instructions
can convert from one data type to another.


> +
> +   brw_pop_insn_state(p);
> +}
>

There's a small problem with this new opcode:
vec4_instruction_scheduler::calculate_deps() has no idea that it reads from
the flags register, so there's a danger that during instruction scheduling,
it will get moved prior to the CMP instruction that sets the flags.

Currently calulate_deps() inspects inst->predicate to determine whether the
instruction relies on the value of the flags register.  I'd recommend doing
something like:

- Create a new inline function "bool vec4_instruction::depends_on_flags()
const" which returns true if inst->predicate is set OR the opcode is
VS_OPCODE_UNPACK_FLAGS_SIMD4x2.

- Replace each reference to inst->predicate in
vec4_instruction::calculate_deps() with a call to this new inline function.


I'd be happy if you want to do that as a follow-up fix, so with the
brw_imm_ud issue fixed, consider this patch:

Reviewed-by: Paul Berry <stereotype...@gmail.com>


> +
> +void
>  vec4_generator::generate_scratch_read(vec4_instruction *inst,
>                                        struct brw_reg dst,
>                                        struct brw_reg index)
> @@ -851,6 +870,10 @@
> vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
>        brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME);
>        break;
>
> +   case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
> +      generate_unpack_flags(inst, dst);
> +      break;
> +
>     default:
>        if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
>           _mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n",
> --
> 1.8.3.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to