On 9 August 2013 17:14, Chris Forbes <chr...@ijw.co.nz> wrote: > Splits the bottom 8 bits of f0.0 for further wrangling > in a SIMD4x2 program. The 4 bits corresponding to the channels in each > program flow are copied to the LSBs of dst.x visible to each flow. > > This is useful for working with clipping flags in the VS. > > Signed-off-by: Chris Forbes <chr...@ijw.co.nz> > --- > src/mesa/drivers/dri/i965/brw_defines.h | 1 + > src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++ > src/mesa/drivers/dri/i965/brw_vec4.h | 2 ++ > src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 23 +++++++++++++++++++++++ > 4 files changed, 28 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index d8b3b17..2ab0a2b 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -798,6 +798,7 @@ enum opcode { > VS_OPCODE_SCRATCH_WRITE, > VS_OPCODE_PULL_CONSTANT_LOAD, > VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, > + VS_OPCODE_UNPACK_FLAGS_SIMD4X2, > }; > > #define BRW_PREDICATE_NONE 0 > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index 9a2e8be..6c7e827 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -494,6 +494,8 @@ brw_instruction_name(enum opcode op) > return "pull_constant_load"; > case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: > return "pull_constant_load_gen7"; > + case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: > + return "unpack_flags_simd4x2"; > > default: > /* Yes, this leaks. It's in debug code, it should never occur, and > if > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h > b/src/mesa/drivers/dri/i965/brw_vec4.h > index 18e0d56..10836cc 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.h > +++ b/src/mesa/drivers/dri/i965/brw_vec4.h > @@ -587,6 +587,8 @@ private: > struct brw_reg dst, > struct brw_reg surf_index, > struct brw_reg offset); > + void generate_unpack_flags(vec4_instruction *inst, > + struct brw_reg dst); > > struct brw_context *brw; > struct gl_context *ctx; > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp > index c82af0e..a81f045 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp > @@ -440,6 +440,25 @@ > vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1, > } > > void > +vec4_generator::generate_unpack_flags(vec4_instruction *inst, > + struct brw_reg dst) > +{ > + brw_push_insn_state(p); > + brw_set_mask_control(p, BRW_MASK_DISABLE); > + brw_set_access_mode(p, BRW_ALIGN_1); > + > + struct brw_reg flags = brw_flag_reg(0, 0); > + struct brw_reg dst_0 = suboffset(vec1(dst), 0); > + struct brw_reg dst_4 = suboffset(vec1(dst), 4); > + > + brw_AND(p, dst_0, flags, brw_imm_uw(0x0f)); > + brw_AND(p, dst_4, flags, brw_imm_uw(0xf0)); > + brw_SHR(p, dst_4, dst_4, brw_imm_uw(4)); >
Shouldn't these 3 immediates use brw_imm_ud? I see from patch 5 that you're using a destination register of type UD, IIRC only MOV instructions can convert from one data type to another. > + > + brw_pop_insn_state(p); > +} > There's a small problem with this new opcode: vec4_instruction_scheduler::calculate_deps() has no idea that it reads from the flags register, so there's a danger that during instruction scheduling, it will get moved prior to the CMP instruction that sets the flags. Currently calulate_deps() inspects inst->predicate to determine whether the instruction relies on the value of the flags register. I'd recommend doing something like: - Create a new inline function "bool vec4_instruction::depends_on_flags() const" which returns true if inst->predicate is set OR the opcode is VS_OPCODE_UNPACK_FLAGS_SIMD4x2. - Replace each reference to inst->predicate in vec4_instruction::calculate_deps() with a call to this new inline function. I'd be happy if you want to do that as a follow-up fix, so with the brw_imm_ud issue fixed, consider this patch: Reviewed-by: Paul Berry <stereotype...@gmail.com> > + > +void > vec4_generator::generate_scratch_read(vec4_instruction *inst, > struct brw_reg dst, > struct brw_reg index) > @@ -851,6 +870,10 @@ > vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, > brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME); > break; > > + case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: > + generate_unpack_flags(inst, dst); > + break; > + > default: > if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) { > _mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n", > -- > 1.8.3.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev