Re: [Mesa-dev] [PATCH 10/11] i965/fs: Implement support for ir_barrier

2015-04-25 Thread Jordan Justen
On 2015-03-22 19:57:47, Chris Forbes wrote:
> Jordan,
> 
> You also need to set m0.2:15 (Barrier count enable) and m0.2:14-9
> (Barrier count) to have the message gateway actually collect the
> proper number of threads, right?

Looking at IVB PRM, Vol 4, Part 2, 1.1.5.2 Message Payload

Under M0.2, bit 15, Barrier Count Enable:
"If clear, the Message Gateway increments the Barrier counter and
 marks the Barrier requester thread. There is no immediate response
 from the Gateway. When the counter value equates Barrier Thread
 Count, Gateway will send response back to all the Barrier
 requesters."

I had trouble finding where "Barrier Thread Count" was defined, but it
appears that if the "Barrier Count Enable" bit is not set, then the
number of threads in the CS local group will be used. I'm not sure if
this can also be used in the same way for other (non-compute) stages.

-Jordan

> On Mon, Mar 23, 2015 at 2:49 PM, Jordan Justen
>  wrote:
> > Signed-off-by: Jordan Justen 
> > Reviewed-by: Chris Forbes 
> > ---
> >  src/mesa/drivers/dri/i965/brw_defines.h|  5 +
> >  src/mesa/drivers/dri/i965/brw_fs.h |  3 +++
> >  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 11 +++
> >  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 27 
> > +-
> >  src/mesa/drivers/dri/i965/brw_shader.cpp   |  3 +++
> >  5 files changed, 48 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> > b/src/mesa/drivers/dri/i965/brw_defines.h
> > index 98a392a..9b1fd15 100644
> > --- a/src/mesa/drivers/dri/i965/brw_defines.h
> > +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> > @@ -1102,6 +1102,11 @@ enum opcode {
> >  *   and number of SO primitives needed.
> >  */
> > GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
> > +
> > +   /**
> > +* GLSL barrier()
> > +*/
> > +   SHADER_OPCODE_BARRIER,
> >  };
> >
> >  enum brw_urb_write_flags {
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
> > b/src/mesa/drivers/dri/i965/brw_fs.h
> > index 86a7906..b55c333 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.h
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> > @@ -383,6 +383,8 @@ public:
> > void emit_fb_writes();
> > void emit_urb_writes();
> >
> > +   void emit_barrier();
> > +
> > void emit_shader_time_begin();
> > void emit_shader_time_end();
> > fs_inst *SHADER_TIME_ADD(enum shader_time_shader_type type, fs_reg 
> > value);
> > @@ -551,6 +553,7 @@ private:
> >GLuint nr);
> > void generate_fb_write(fs_inst *inst, struct brw_reg payload);
> > void generate_urb_write(fs_inst *inst, struct brw_reg payload);
> > +   void generate_barrier(fs_inst *inst, struct brw_reg src);
> > void generate_blorp_fb_write(fs_inst *inst);
> > void generate_pixel_xy(struct brw_reg dst, bool is_x);
> > void generate_linterp(fs_inst *inst, struct brw_reg dst,
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
> > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > index bd12147..f817e84 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > @@ -369,6 +369,13 @@ fs_generator::generate_urb_write(fs_inst *inst, struct 
> > brw_reg payload)
> >  }
> >
> >  void
> > +fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
> > +{
> > +   brw_barrier(p, src);
> > +   brw_wait(p);
> > +}
> > +
> > +void
> >  fs_generator::generate_blorp_fb_write(fs_inst *inst)
> >  {
> > brw_fb_WRITE(p,
> > @@ -2060,6 +2067,10 @@ fs_generator::generate_code(const cfg_t *cfg, int 
> > dispatch_width)
> > 
> > GEN7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET);
> >   break;
> >
> > +  case SHADER_OPCODE_BARRIER:
> > +generate_barrier(inst, src[0]);
> > +break;
> > +
> >default:
> >  if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
> > _mesa_problem(ctx, "Unsupported opcode `%s' in %s",
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
> > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> > index 2b1b72f..5cde8f5 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> > @@ -3146,7 +3146,32 @@ fs_visitor::visit(ir_end_primitive *)
> >  void
> >  fs_visitor::visit(ir_barrier *)
> >  {
> > -   assert(!"Not implemented!");
> > +   emit_barrier();
> > +}
> > +
> > +void
> > +fs_visitor::emit_barrier()
> > +{
> > +   assert(brw->gen >= 7);
> > +
> > +   /* We are getting the barrier ID from the compute shader header */
> > +   assert(stage == MESA_SHADER_COMPUTE);
> > +
> > +   fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
> > +
> > +   /* Clear the message payload */
> > +   fs_inst *inst = emit(MOV(payload, fs_reg(0u)));
> > +   inst->force_writemask_all = true;
> > +
> > +   /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.

Re: [Mesa-dev] [PATCH 10/11] i965/fs: Implement support for ir_barrier

2015-03-22 Thread Chris Forbes
Jordan,

You also need to set m0.2:15 (Barrier count enable) and m0.2:14-9
(Barrier count) to have the message gateway actually collect the
proper number of threads, right?

- Chris



On Mon, Mar 23, 2015 at 2:49 PM, Jordan Justen
 wrote:
> Signed-off-by: Jordan Justen 
> Reviewed-by: Chris Forbes 
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h|  5 +
>  src/mesa/drivers/dri/i965/brw_fs.h |  3 +++
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 11 +++
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 27 
> +-
>  src/mesa/drivers/dri/i965/brw_shader.cpp   |  3 +++
>  5 files changed, 48 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index 98a392a..9b1fd15 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1102,6 +1102,11 @@ enum opcode {
>  *   and number of SO primitives needed.
>  */
> GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
> +
> +   /**
> +* GLSL barrier()
> +*/
> +   SHADER_OPCODE_BARRIER,
>  };
>
>  enum brw_urb_write_flags {
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index 86a7906..b55c333 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -383,6 +383,8 @@ public:
> void emit_fb_writes();
> void emit_urb_writes();
>
> +   void emit_barrier();
> +
> void emit_shader_time_begin();
> void emit_shader_time_end();
> fs_inst *SHADER_TIME_ADD(enum shader_time_shader_type type, fs_reg value);
> @@ -551,6 +553,7 @@ private:
>GLuint nr);
> void generate_fb_write(fs_inst *inst, struct brw_reg payload);
> void generate_urb_write(fs_inst *inst, struct brw_reg payload);
> +   void generate_barrier(fs_inst *inst, struct brw_reg src);
> void generate_blorp_fb_write(fs_inst *inst);
> void generate_pixel_xy(struct brw_reg dst, bool is_x);
> void generate_linterp(fs_inst *inst, struct brw_reg dst,
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index bd12147..f817e84 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -369,6 +369,13 @@ fs_generator::generate_urb_write(fs_inst *inst, struct 
> brw_reg payload)
>  }
>
>  void
> +fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
> +{
> +   brw_barrier(p, src);
> +   brw_wait(p);
> +}
> +
> +void
>  fs_generator::generate_blorp_fb_write(fs_inst *inst)
>  {
> brw_fb_WRITE(p,
> @@ -2060,6 +2067,10 @@ fs_generator::generate_code(const cfg_t *cfg, int 
> dispatch_width)
> 
> GEN7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET);
>   break;
>
> +  case SHADER_OPCODE_BARRIER:
> +generate_barrier(inst, src[0]);
> +break;
> +
>default:
>  if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
> _mesa_problem(ctx, "Unsupported opcode `%s' in %s",
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 2b1b72f..5cde8f5 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -3146,7 +3146,32 @@ fs_visitor::visit(ir_end_primitive *)
>  void
>  fs_visitor::visit(ir_barrier *)
>  {
> -   assert(!"Not implemented!");
> +   emit_barrier();
> +}
> +
> +void
> +fs_visitor::emit_barrier()
> +{
> +   assert(brw->gen >= 7);
> +
> +   /* We are getting the barrier ID from the compute shader header */
> +   assert(stage == MESA_SHADER_COMPUTE);
> +
> +   fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
> +
> +   /* Clear the message payload */
> +   fs_inst *inst = emit(MOV(payload, fs_reg(0u)));
> +   inst->force_writemask_all = true;
> +
> +   /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */
> +   struct fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), 
> BRW_REGISTER_TYPE_UD));
> +   inst = emit(AND(component(payload, 2), r0_2, fs_reg(0x0f00u)));
> +   inst->force_writemask_all = true;
> +
> +   /* Emit a gateway "barrier" message using the payload we set up, followed
> +* by a wait instruction.
> +*/
> +   emit(SHADER_OPCODE_BARRIER, reg_undef, payload);
>  }
>
>  void
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
> b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index 51c965c..d0a7c2a 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -572,6 +572,8 @@ brw_instruction_name(enum opcode op)
>return "gs_svb_set_dst_index";
> case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
>return "gs_ff_sync_set_primitives";
> +   case SHADER_OPCODE_BARRIER:
> +  return "barrier";
> }
>
> unreachable("not reached");
> @@ -986

Re: [Mesa-dev] [PATCH 10/11] i965/fs: Implement support for ir_barrier

2015-03-22 Thread Matt Turner
On Sun, Mar 22, 2015 at 6:49 PM, Jordan Justen
 wrote:
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 2b1b72f..5cde8f5 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -3146,7 +3146,32 @@ fs_visitor::visit(ir_end_primitive *)
>  void
>  fs_visitor::visit(ir_barrier *)
>  {
> -   assert(!"Not implemented!");
> +   emit_barrier();
> +}
> +
> +void
> +fs_visitor::emit_barrier()
> +{
> +   assert(brw->gen >= 7);
> +
> +   /* We are getting the barrier ID from the compute shader header */
> +   assert(stage == MESA_SHADER_COMPUTE);
> +
> +   fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
> +
> +   /* Clear the message payload */
> +   fs_inst *inst = emit(MOV(payload, fs_reg(0u)));
> +   inst->force_writemask_all = true;
> +
> +   /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */
> +   struct fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), 
> BRW_REGISTER_TYPE_UD));

I was going to say that you needed to line wrap this, but you can just
remove 'struct' to reduce the line length.

> +   inst = emit(AND(component(payload, 2), r0_2, fs_reg(0x0f00u)));
> +   inst->force_writemask_all = true;
> +
> +   /* Emit a gateway "barrier" message using the payload we set up, followed
> +* by a wait instruction.
> +*/
> +   emit(SHADER_OPCODE_BARRIER, reg_undef, payload);
>  }
>
>  void
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev