Re: [Mesa-dev] [PATCH 27/37] i965/gen6/gs: Add an additional parameter to the FF_SYNC opcode.

2014-09-18 Thread Jordan Justen
On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga ito...@igalia.com wrote:
 From: Samuel Iglesias Gonsalvez sigles...@igalia.com

 We will use this parameter in later patches to provide information relevant
 to transform feedback that needs to be set as part of the FF_SYNC message.

 Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com
 ---
  src/mesa/drivers/dri/i965/brw_defines.h  |  4 
  src/mesa/drivers/dri/i965/brw_vec4.h |  3 ++-
  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 +---
  src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp|  3 ++-
  4 files changed, 21 insertions(+), 5 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
 b/src/mesa/drivers/dri/i965/brw_defines.h
 index 6e8b998..b0d6d9f 100644
 --- a/src/mesa/drivers/dri/i965/brw_defines.h
 +++ b/src/mesa/drivers/dri/i965/brw_defines.h
 @@ -1030,6 +1030,10 @@ enum opcode {
  *   FF_SYNC operation.
  *
  * - src1 is the number of primitives written.
 +*
 +* - src2 is the value to hold in M0.0: number of SO vertices to write
 +*   and number of SO primitives needed. Its value will be overwritten
 +*   with the SVBI values if transform feedback is enabled.
  */
 GS_OPCODE_FF_SYNC,

 diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
 b/src/mesa/drivers/dri/i965/brw_vec4.h
 index 763cb23..58a5aac 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4.h
 +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
 @@ -679,7 +679,8 @@ private:
 struct brw_reg src2);
 void generate_gs_ff_sync(struct brw_reg dst,
  struct brw_reg src0,
 -struct brw_reg src1);
 +struct brw_reg src1,
 +struct brw_reg src2);
 void generate_gs_set_primitive_id(struct brw_reg dst);
 void generate_oword_dual_block_offsets(struct brw_reg m1,
   struct brw_reg index);
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 index d4554f5..c69b305 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 @@ -734,7 +734,8 @@ vec4_generator::generate_gs_ff_sync_set_primitives(struct 
 brw_reg dst,
  void
  vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
  struct brw_reg src0,
 -struct brw_reg src1)
 +struct brw_reg src1,
 +struct brw_reg src2)
  {
 /* We use dst to setup the ff_sync header, so we expect it to be
  * initialized to R0 by the caller. Here we overwrite dword 0 (cleared
 @@ -744,7 +745,7 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
 brw_push_insn_state(p);
 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 brw_set_default_access_mode(p, BRW_ALIGN_1);
 -   brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0));
 +   brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src2, 0));
 brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0));
 brw_set_default_access_mode(p, BRW_ALIGN_16);
 brw_pop_insn_state(p);
 @@ -763,6 +764,15 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
 brw_set_default_access_mode(p, BRW_ALIGN_1);
 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0));
 +
 +   /* src2 is not an immediate when we use transform feedback */
 +   if (src2.file != BRW_IMMEDIATE_VALUE) {
 +  brw_MOV(p, suboffset(vec1(src2), 0), suboffset(vec1(src0), 1));
 +  brw_MOV(p, suboffset(vec1(src2), 1), suboffset(vec1(src0), 2));
 +  brw_MOV(p, suboffset(vec1(src2), 2), suboffset(vec1(src0), 3));
 +  brw_MOV(p, suboffset(vec1(src2), 3), suboffset(vec1(src0), 4));

Ken and I discussed this a bit. Ken suggested that this:
brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1));

Should be able to copy all 4 dwords in one instruction. What do you think?

By the way, this was for the version of this patch on the
gs-support-snb-for-submission-02092014 which has src1 as the
destination and dst as the source for the moves. (Hmm, not sure about
the src1 naming in this context...)

If that change seem good, then
Reviewed-by: Jordan Justen jordan.l.jus...@intel.com

 +   }
 +
 brw_set_default_access_mode(p, BRW_ALIGN_16);
 brw_pop_insn_state(p);
  }
 @@ -1374,7 +1384,7 @@ 
 vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
break;

 case GS_OPCODE_FF_SYNC:
 -  generate_gs_ff_sync(dst, src[0], src[1]);
 +  generate_gs_ff_sync(dst, src[0], src[1], src[2]);
break;

 case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
 diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp 
 b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
 index 

Re: [Mesa-dev] [PATCH 27/37] i965/gen6/gs: Add an additional parameter to the FF_SYNC opcode.

2014-09-18 Thread Iago Toral Quiroga
On jue, 2014-09-18 at 00:48 -0700, Jordan Justen wrote:
 On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga ito...@igalia.com wrote:
  From: Samuel Iglesias Gonsalvez sigles...@igalia.com
 
  We will use this parameter in later patches to provide information relevant
  to transform feedback that needs to be set as part of the FF_SYNC message.
 
  Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com
  ---
   src/mesa/drivers/dri/i965/brw_defines.h  |  4 
   src/mesa/drivers/dri/i965/brw_vec4.h |  3 ++-
   src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 +---
   src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp|  3 ++-
   4 files changed, 21 insertions(+), 5 deletions(-)
 
  diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
  b/src/mesa/drivers/dri/i965/brw_defines.h
  index 6e8b998..b0d6d9f 100644
  --- a/src/mesa/drivers/dri/i965/brw_defines.h
  +++ b/src/mesa/drivers/dri/i965/brw_defines.h
  @@ -1030,6 +1030,10 @@ enum opcode {
   *   FF_SYNC operation.
   *
   * - src1 is the number of primitives written.
  +*
  +* - src2 is the value to hold in M0.0: number of SO vertices to write
  +*   and number of SO primitives needed. Its value will be overwritten
  +*   with the SVBI values if transform feedback is enabled.
   */
  GS_OPCODE_FF_SYNC,
 
  diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
  b/src/mesa/drivers/dri/i965/brw_vec4.h
  index 763cb23..58a5aac 100644
  --- a/src/mesa/drivers/dri/i965/brw_vec4.h
  +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
  @@ -679,7 +679,8 @@ private:
  struct brw_reg src2);
  void generate_gs_ff_sync(struct brw_reg dst,
   struct brw_reg src0,
  -struct brw_reg src1);
  +struct brw_reg src1,
  +struct brw_reg src2);
  void generate_gs_set_primitive_id(struct brw_reg dst);
  void generate_oword_dual_block_offsets(struct brw_reg m1,
struct brw_reg index);
  diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
  b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
  index d4554f5..c69b305 100644
  --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
  +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
  @@ -734,7 +734,8 @@ 
  vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst,
   void
   vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
   struct brw_reg src0,
  -struct brw_reg src1)
  +struct brw_reg src1,
  +struct brw_reg src2)
   {
  /* We use dst to setup the ff_sync header, so we expect it to be
   * initialized to R0 by the caller. Here we overwrite dword 0 (cleared
  @@ -744,7 +745,7 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
  brw_push_insn_state(p);
  brw_set_default_mask_control(p, BRW_MASK_DISABLE);
  brw_set_default_access_mode(p, BRW_ALIGN_1);
  -   brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0));
  +   brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src2, 0));
  brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0));
  brw_set_default_access_mode(p, BRW_ALIGN_16);
  brw_pop_insn_state(p);
  @@ -763,6 +764,15 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
  brw_set_default_access_mode(p, BRW_ALIGN_1);
  brw_set_default_mask_control(p, BRW_MASK_DISABLE);
  brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0));
  +
  +   /* src2 is not an immediate when we use transform feedback */
  +   if (src2.file != BRW_IMMEDIATE_VALUE) {
  +  brw_MOV(p, suboffset(vec1(src2), 0), suboffset(vec1(src0), 1));
  +  brw_MOV(p, suboffset(vec1(src2), 1), suboffset(vec1(src0), 2));
  +  brw_MOV(p, suboffset(vec1(src2), 2), suboffset(vec1(src0), 3));
  +  brw_MOV(p, suboffset(vec1(src2), 3), suboffset(vec1(src0), 4));
 
 Ken and I discussed this a bit. Ken suggested that this:
 brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1));
 
 Should be able to copy all 4 dwords in one instruction. What do you think?

Sure, if we can do this in just on MOV that is better. I'll give it a
try.

 By the way, this was for the version of this patch on the
 gs-support-snb-for-submission-02092014 which has src1 as the
 destination and dst as the source for the moves. (Hmm, not sure about
 the src1 naming in this context...)

Yes, this is used as both a src and a dst... and I supposed Samuel
decided to follow naming conventions for other opcodes that have a dst
and multiple src parameters. I suppose the best way to do this would
have been to create a separate generator opcode for the part where this
is used as a destination register only...

Iago

 If that change seem good, then
 Reviewed-by: Jordan Justen 

Re: [Mesa-dev] [PATCH 27/37] i965/gen6/gs: Add an additional parameter to the FF_SYNC opcode.

2014-09-18 Thread Samuel Iglesias Gonsálvez
On Thu, 2014-09-18 at 10:39 +0200, Iago Toral Quiroga wrote:
 On jue, 2014-09-18 at 00:48 -0700, Jordan Justen wrote:
  On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga ito...@igalia.com 
  wrote:
   From: Samuel Iglesias Gonsalvez sigles...@igalia.com
  
   We will use this parameter in later patches to provide information 
   relevant
   to transform feedback that needs to be set as part of the FF_SYNC message.
  
   Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com
   ---
src/mesa/drivers/dri/i965/brw_defines.h  |  4 
src/mesa/drivers/dri/i965/brw_vec4.h |  3 ++-
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 +---
src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp|  3 ++-
4 files changed, 21 insertions(+), 5 deletions(-)
  
   diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
   b/src/mesa/drivers/dri/i965/brw_defines.h
   index 6e8b998..b0d6d9f 100644
   --- a/src/mesa/drivers/dri/i965/brw_defines.h
   +++ b/src/mesa/drivers/dri/i965/brw_defines.h
   @@ -1030,6 +1030,10 @@ enum opcode {
*   FF_SYNC operation.
*
* - src1 is the number of primitives written.
   +*
   +* - src2 is the value to hold in M0.0: number of SO vertices to write
   +*   and number of SO primitives needed. Its value will be overwritten
   +*   with the SVBI values if transform feedback is enabled.
*/
   GS_OPCODE_FF_SYNC,
  
   diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
   b/src/mesa/drivers/dri/i965/brw_vec4.h
   index 763cb23..58a5aac 100644
   --- a/src/mesa/drivers/dri/i965/brw_vec4.h
   +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
   @@ -679,7 +679,8 @@ private:
   struct brw_reg src2);
   void generate_gs_ff_sync(struct brw_reg dst,
struct brw_reg src0,
   -struct brw_reg src1);
   +struct brw_reg src1,
   +struct brw_reg src2);
   void generate_gs_set_primitive_id(struct brw_reg dst);
   void generate_oword_dual_block_offsets(struct brw_reg m1,
 struct brw_reg index);
   diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
   b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
   index d4554f5..c69b305 100644
   --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
   +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
   @@ -734,7 +734,8 @@ 
   vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst,
void
vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
struct brw_reg src0,
   -struct brw_reg src1)
   +struct brw_reg src1,
   +struct brw_reg src2)
{
   /* We use dst to setup the ff_sync header, so we expect it to be
* initialized to R0 by the caller. Here we overwrite dword 0 (cleared
   @@ -744,7 +745,7 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg 
   dst,
   brw_push_insn_state(p);
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
   brw_set_default_access_mode(p, BRW_ALIGN_1);
   -   brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0));
   +   brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src2, 0));
   brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0));
   brw_set_default_access_mode(p, BRW_ALIGN_16);
   brw_pop_insn_state(p);
   @@ -763,6 +764,15 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg 
   dst,
   brw_set_default_access_mode(p, BRW_ALIGN_1);
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
   brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0));
   +
   +   /* src2 is not an immediate when we use transform feedback */
   +   if (src2.file != BRW_IMMEDIATE_VALUE) {
   +  brw_MOV(p, suboffset(vec1(src2), 0), suboffset(vec1(src0), 1));
   +  brw_MOV(p, suboffset(vec1(src2), 1), suboffset(vec1(src0), 2));
   +  brw_MOV(p, suboffset(vec1(src2), 2), suboffset(vec1(src0), 3));
   +  brw_MOV(p, suboffset(vec1(src2), 3), suboffset(vec1(src0), 4));
  
  Ken and I discussed this a bit. Ken suggested that this:
  brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1));
  
  Should be able to copy all 4 dwords in one instruction. What do you think?
 
 Sure, if we can do this in just on MOV that is better. I'll give it a
 try.
 

Piglit shows no regressions with this change, it works like a charm!

We will add it to the commit.

Thanks,

Sam


signature.asc
Description: This is a digitally signed message part
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 27/37] i965/gen6/gs: Add an additional parameter to the FF_SYNC opcode.

2014-08-14 Thread Iago Toral Quiroga
From: Samuel Iglesias Gonsalvez sigles...@igalia.com

We will use this parameter in later patches to provide information relevant
to transform feedback that needs to be set as part of the FF_SYNC message.

Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com
---
 src/mesa/drivers/dri/i965/brw_defines.h  |  4 
 src/mesa/drivers/dri/i965/brw_vec4.h |  3 ++-
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 +---
 src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp|  3 ++-
 4 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 6e8b998..b0d6d9f 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1030,6 +1030,10 @@ enum opcode {
 *   FF_SYNC operation.
 *
 * - src1 is the number of primitives written.
+*
+* - src2 is the value to hold in M0.0: number of SO vertices to write
+*   and number of SO primitives needed. Its value will be overwritten
+*   with the SVBI values if transform feedback is enabled.
 */
GS_OPCODE_FF_SYNC,
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 763cb23..58a5aac 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -679,7 +679,8 @@ private:
struct brw_reg src2);
void generate_gs_ff_sync(struct brw_reg dst,
 struct brw_reg src0,
-struct brw_reg src1);
+struct brw_reg src1,
+struct brw_reg src2);
void generate_gs_set_primitive_id(struct brw_reg dst);
void generate_oword_dual_block_offsets(struct brw_reg m1,
  struct brw_reg index);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index d4554f5..c69b305 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -734,7 +734,8 @@ vec4_generator::generate_gs_ff_sync_set_primitives(struct 
brw_reg dst,
 void
 vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
 struct brw_reg src0,
-struct brw_reg src1)
+struct brw_reg src1,
+struct brw_reg src2)
 {
/* We use dst to setup the ff_sync header, so we expect it to be
 * initialized to R0 by the caller. Here we overwrite dword 0 (cleared
@@ -744,7 +745,7 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_access_mode(p, BRW_ALIGN_1);
-   brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0));
+   brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src2, 0));
brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0));
brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
@@ -763,6 +764,15 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0));
+
+   /* src2 is not an immediate when we use transform feedback */
+   if (src2.file != BRW_IMMEDIATE_VALUE) {
+  brw_MOV(p, suboffset(vec1(src2), 0), suboffset(vec1(src0), 1));
+  brw_MOV(p, suboffset(vec1(src2), 1), suboffset(vec1(src0), 2));
+  brw_MOV(p, suboffset(vec1(src2), 2), suboffset(vec1(src0), 3));
+  brw_MOV(p, suboffset(vec1(src2), 3), suboffset(vec1(src0), 4));
+   }
+
brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
 }
@@ -1374,7 +1384,7 @@ 
vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
   break;
 
case GS_OPCODE_FF_SYNC:
-  generate_gs_ff_sync(dst, src[0], src[1]);
+  generate_gs_ff_sync(dst, src[0], src[1], src[2]);
   break;
 
case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp 
b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
index b45c381..c1cfe75 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -331,7 +331,8 @@ gen6_gs_visitor::emit_thread_end()
{
   this-current_annotation = gen6 thread end: ff_sync;
   emit(GS_OPCODE_FF_SYNC,
-   dst_reg(MRF, base_mrf), this-temp, this-prim_count);
+   dst_reg(MRF, base_mrf), this-temp, this-prim_count,
+   brw_imm_ud(0u));
 
   /* Loop over all buffered vertices and emit URB write messages */
   this-current_annotation = gen6 thread end: urb writes init;
-- 
1.9.1

___
mesa-dev mailing list