Re: [Mesa-dev] [PATCH 09/25] i965/gen7: Fix the untyped surface messages to deal with indirect surface access.

2013-12-31 Thread Paul Berry
On 2 December 2013 11:39, Francisco Jerez  wrote:

> Change brw_untyped_atomic() and brw_untyped_surface_read() to take the
> surface index as a register instead of a constant, construct the
> message descriptor dynamically by OR'ing the surface index and other
> descriptor bits together and use the non-immediate variant of SEND to
> submit the surface message.
> ---
>  src/mesa/drivers/dri/i965/brw_eu.h   |  18 +-
>  src/mesa/drivers/dri/i965/brw_eu_emit.c  | 200
> +++
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp   |   7 +-
>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp |   7 +-
>  4 files changed, 147 insertions(+), 85 deletions(-)
>

There's something a bit hacky about the way brw_set_message_descriptor()
has always worked, and IMHO this patch makes that hackiness a lot more
troublesome.

The hackiness is: instead of computing the message descriptor we want as an
int32 and then passing it to brw_set_src1() using brw_imm_d(),
brw_set_message_descriptor() would pass brw_imm_d(0) to brw_set_src1(), and
then it would poke the proper message descriptor bits directly into the
brw_instruction::bits3 (which holds the immediate value used by the
instruction).

Previous to this patch, that hackiness was confined to just SEND
instructions.  But with this patch, brw_load_indirect_message_descriptor()
now does the same sort of hack to MOV and OR instructions in order to set
the msg_length, response_length, and header_present fields of the message
descriptor that's being dynamically computed.

I would much rather if we first refactored the code to deal with message
descriptors in a non-hacky way.  That is, instead of setting the immediate
value to 0 and then poking in the message descriptor bits, first compute
the correct message descriptor as an int32_t, and then store it in the SEND
instruction using brw_set_src1().  As part of this refactor, we would move
the message descriptor bitfield definitions out of brw_instruction::bits3
and into their own independent union.

Then, in this patch, instead of having
brw_load_indirect_message_descriptor() poke the constant parts of the
message descriptor into the OR or MOV instruction, it could just use the
new union to set the msg_length, response_length, and header_present fields
of the message descriptor, and then pass the resulting int32 value to
brw_MOV() or brw_OR() via brw_imm_d().  I think the resulting code would be
a lot easier to understand and maintain.

Additional comments below, though I'm not sure if they're all relevant
considering the refactor I'm suggesting above.


>
> diff --git a/src/mesa/drivers/dri/i965/brw_eu.h
> b/src/mesa/drivers/dri/i965/brw_eu.h
> index a6a65ca..45b421b 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu.h
> +++ b/src/mesa/drivers/dri/i965/brw_eu.h
> @@ -360,20 +360,20 @@ void brw_CMP(struct brw_compile *p,
>
>  void
>  brw_untyped_atomic(struct brw_compile *p,
> -   struct brw_reg dest,
> +   struct brw_reg dst,
> struct brw_reg mrf,
> -   GLuint atomic_op,
> -   GLuint bind_table_index,
> -   GLuint msg_length,
> -   GLuint response_length);
> +   struct brw_reg surface,
> +   unsigned atomic_op,
> +   unsigned msg_length,
> +   bool response_expected);
>
>  void
>  brw_untyped_surface_read(struct brw_compile *p,
> - struct brw_reg dest,
> + struct brw_reg dst,
>   struct brw_reg mrf,
> - GLuint bind_table_index,
> - GLuint msg_length,
> - GLuint response_length);
> + struct brw_reg surface,
> + unsigned msg_length,
> + unsigned num_channels);
>
>  /***
>   * brw_eu_util.c:
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> index cc093e0..b94a6d1 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> @@ -2527,23 +2527,87 @@ brw_svb_write(struct brw_compile *p,
>  send_commit_msg); /* send_commit_msg */
>  }
>
> +static struct brw_instruction *
> +brw_load_indirect_message_descriptor(struct brw_compile *p,
> + struct brw_reg dst,
> + struct brw_reg src,
> + unsigned msg_length,
> + unsigned response_length,
> + bool header_present)
> +{
> +   struct brw_instruction *insn;
> +
> +   brw_push_insn_state(p);
> +   brw_set_access_mode(p, BRW_ALIGN_1);
> +   brw_set_mask_control(p, BRW_MASK_DISABLE);
> +   brw_set_predicate_control(p,

[Mesa-dev] [PATCH 09/25] i965/gen7: Fix the untyped surface messages to deal with indirect surface access.

2013-12-02 Thread Francisco Jerez
Change brw_untyped_atomic() and brw_untyped_surface_read() to take the
surface index as a register instead of a constant, construct the
message descriptor dynamically by OR'ing the surface index and other
descriptor bits together and use the non-immediate variant of SEND to
submit the surface message.
---
 src/mesa/drivers/dri/i965/brw_eu.h   |  18 +-
 src/mesa/drivers/dri/i965/brw_eu_emit.c  | 200 +++
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   |   7 +-
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp |   7 +-
 4 files changed, 147 insertions(+), 85 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
b/src/mesa/drivers/dri/i965/brw_eu.h
index a6a65ca..45b421b 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -360,20 +360,20 @@ void brw_CMP(struct brw_compile *p,
 
 void
 brw_untyped_atomic(struct brw_compile *p,
-   struct brw_reg dest,
+   struct brw_reg dst,
struct brw_reg mrf,
-   GLuint atomic_op,
-   GLuint bind_table_index,
-   GLuint msg_length,
-   GLuint response_length);
+   struct brw_reg surface,
+   unsigned atomic_op,
+   unsigned msg_length,
+   bool response_expected);
 
 void
 brw_untyped_surface_read(struct brw_compile *p,
- struct brw_reg dest,
+ struct brw_reg dst,
  struct brw_reg mrf,
- GLuint bind_table_index,
- GLuint msg_length,
- GLuint response_length);
+ struct brw_reg surface,
+ unsigned msg_length,
+ unsigned num_channels);
 
 /*** 
  * brw_eu_util.c:
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index cc093e0..b94a6d1 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2527,23 +2527,87 @@ brw_svb_write(struct brw_compile *p,
 send_commit_msg); /* send_commit_msg */
 }
 
+static struct brw_instruction *
+brw_load_indirect_message_descriptor(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ unsigned msg_length,
+ unsigned response_length,
+ bool header_present)
+{
+   struct brw_instruction *insn;
+
+   brw_push_insn_state(p);
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+   if (src.file == BRW_IMMEDIATE_VALUE) {
+  insn = brw_MOV(p, dst, brw_imm_ud(src.dw1.ud));
+   } else {
+  struct brw_reg tmp = suboffset(vec1(retype(src, BRW_REGISTER_TYPE_UD)),
+ BRW_GET_SWZ(src.dw1.bits.swizzle, 0));
+  insn = brw_OR(p, dst, tmp, brw_imm_ud(0));
+   }
+
+   insn->bits3.generic_gen5.msg_length = msg_length;
+   insn->bits3.generic_gen5.response_length = response_length;
+   insn->bits3.generic_gen5.header_present = header_present;
+
+   brw_pop_insn_state(p);
+
+   return insn;
+}
+
+static struct brw_instruction *
+brw_send_indirect_message(struct brw_compile *p,
+  unsigned sfid,
+  struct brw_reg dst,
+  struct brw_reg mrf,
+  struct brw_reg desc)
+{
+   /* Due to a hardware limitation the message descriptor desc MUST be
+* stored in a0.0.  That means that there's only room for one
+* descriptor and the surface indices of different channels in the
+* same SIMD thread cannot diverge.  That's OK for the moment
+* because OpenGL requires image (and atomic counter) array
+* indexing to be dynamically uniform.
+*/
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+   brw_set_dest(p, insn, retype(dst, BRW_REGISTER_TYPE_UD));
+   brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
+   brw_set_src1(p, insn, retype(desc, BRW_REGISTER_TYPE_UD));
+
+   /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
+   insn->header.destreg__conditionalmod = sfid;
+
+   return insn;
+}
+
+static unsigned
+brw_surface_payload_size(struct brw_compile *p,
+ unsigned num_channels,
+ bool has_simd4x2,
+ bool has_simd16)
+{
+   if (has_simd4x2 && p->current->header.access_mode == BRW_ALIGN_16)
+  return 1;
+   else if (has_simd16 && p->compressed)
+  return 2 * num_channels;
+   else
+  return num_channels;
+}
+
 static void
 brw_set_dp_untyped_atomic_messa