Re: [Mesa-dev] [PATCH 27/37] i965/gen6/gs: Add an additional parameter to the FF_SYNC opcode.
On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga ito...@igalia.com wrote: From: Samuel Iglesias Gonsalvez sigles...@igalia.com We will use this parameter in later patches to provide information relevant to transform feedback that needs to be set as part of the FF_SYNC message. Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com --- src/mesa/drivers/dri/i965/brw_defines.h | 4 src/mesa/drivers/dri/i965/brw_vec4.h | 3 ++- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 +--- src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp| 3 ++- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6e8b998..b0d6d9f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1030,6 +1030,10 @@ enum opcode { * FF_SYNC operation. * * - src1 is the number of primitives written. +* +* - src2 is the value to hold in M0.0: number of SO vertices to write +* and number of SO primitives needed. Its value will be overwritten +* with the SVBI values if transform feedback is enabled. */ GS_OPCODE_FF_SYNC, diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 763cb23..58a5aac 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -679,7 +679,8 @@ private: struct brw_reg src2); void generate_gs_ff_sync(struct brw_reg dst, struct brw_reg src0, -struct brw_reg src1); +struct brw_reg src1, +struct brw_reg src2); void generate_gs_set_primitive_id(struct brw_reg dst); void generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index d4554f5..c69b305 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -734,7 +734,8 @@ vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst, void vec4_generator::generate_gs_ff_sync(struct brw_reg dst, struct brw_reg src0, -struct brw_reg src1) +struct brw_reg src1, +struct brw_reg src2) { /* We use dst to setup the ff_sync header, so we expect it to be * initialized to R0 by the caller. Here we overwrite dword 0 (cleared @@ -744,7 +745,7 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0)); + brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src2, 0)); brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0)); brw_set_default_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); @@ -763,6 +764,15 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst, brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0)); + + /* src2 is not an immediate when we use transform feedback */ + if (src2.file != BRW_IMMEDIATE_VALUE) { + brw_MOV(p, suboffset(vec1(src2), 0), suboffset(vec1(src0), 1)); + brw_MOV(p, suboffset(vec1(src2), 1), suboffset(vec1(src0), 2)); + brw_MOV(p, suboffset(vec1(src2), 2), suboffset(vec1(src0), 3)); + brw_MOV(p, suboffset(vec1(src2), 3), suboffset(vec1(src0), 4)); Ken and I discussed this a bit. Ken suggested that this: brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1)); Should be able to copy all 4 dwords in one instruction. What do you think? By the way, this was for the version of this patch on the gs-support-snb-for-submission-02092014 which has src1 as the destination and dst as the source for the moves. (Hmm, not sure about the src1 naming in this context...) If that change seem good, then Reviewed-by: Jordan Justen jordan.l.jus...@intel.com + } + brw_set_default_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); } @@ -1374,7 +1384,7 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, break; case GS_OPCODE_FF_SYNC: - generate_gs_ff_sync(dst, src[0], src[1]); + generate_gs_ff_sync(dst, src[0], src[1], src[2]); break; case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index
Re: [Mesa-dev] [PATCH 27/37] i965/gen6/gs: Add an additional parameter to the FF_SYNC opcode.
On jue, 2014-09-18 at 00:48 -0700, Jordan Justen wrote: On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga ito...@igalia.com wrote: From: Samuel Iglesias Gonsalvez sigles...@igalia.com We will use this parameter in later patches to provide information relevant to transform feedback that needs to be set as part of the FF_SYNC message. Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com --- src/mesa/drivers/dri/i965/brw_defines.h | 4 src/mesa/drivers/dri/i965/brw_vec4.h | 3 ++- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 +--- src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp| 3 ++- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6e8b998..b0d6d9f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1030,6 +1030,10 @@ enum opcode { * FF_SYNC operation. * * - src1 is the number of primitives written. +* +* - src2 is the value to hold in M0.0: number of SO vertices to write +* and number of SO primitives needed. Its value will be overwritten +* with the SVBI values if transform feedback is enabled. */ GS_OPCODE_FF_SYNC, diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 763cb23..58a5aac 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -679,7 +679,8 @@ private: struct brw_reg src2); void generate_gs_ff_sync(struct brw_reg dst, struct brw_reg src0, -struct brw_reg src1); +struct brw_reg src1, +struct brw_reg src2); void generate_gs_set_primitive_id(struct brw_reg dst); void generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index d4554f5..c69b305 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -734,7 +734,8 @@ vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst, void vec4_generator::generate_gs_ff_sync(struct brw_reg dst, struct brw_reg src0, -struct brw_reg src1) +struct brw_reg src1, +struct brw_reg src2) { /* We use dst to setup the ff_sync header, so we expect it to be * initialized to R0 by the caller. Here we overwrite dword 0 (cleared @@ -744,7 +745,7 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0)); + brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src2, 0)); brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0)); brw_set_default_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); @@ -763,6 +764,15 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst, brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0)); + + /* src2 is not an immediate when we use transform feedback */ + if (src2.file != BRW_IMMEDIATE_VALUE) { + brw_MOV(p, suboffset(vec1(src2), 0), suboffset(vec1(src0), 1)); + brw_MOV(p, suboffset(vec1(src2), 1), suboffset(vec1(src0), 2)); + brw_MOV(p, suboffset(vec1(src2), 2), suboffset(vec1(src0), 3)); + brw_MOV(p, suboffset(vec1(src2), 3), suboffset(vec1(src0), 4)); Ken and I discussed this a bit. Ken suggested that this: brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1)); Should be able to copy all 4 dwords in one instruction. What do you think? Sure, if we can do this in just on MOV that is better. I'll give it a try. By the way, this was for the version of this patch on the gs-support-snb-for-submission-02092014 which has src1 as the destination and dst as the source for the moves. (Hmm, not sure about the src1 naming in this context...) Yes, this is used as both a src and a dst... and I supposed Samuel decided to follow naming conventions for other opcodes that have a dst and multiple src parameters. I suppose the best way to do this would have been to create a separate generator opcode for the part where this is used as a destination register only... Iago If that change seem good, then Reviewed-by: Jordan Justen
Re: [Mesa-dev] [PATCH 27/37] i965/gen6/gs: Add an additional parameter to the FF_SYNC opcode.
On Thu, 2014-09-18 at 10:39 +0200, Iago Toral Quiroga wrote: On jue, 2014-09-18 at 00:48 -0700, Jordan Justen wrote: On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga ito...@igalia.com wrote: From: Samuel Iglesias Gonsalvez sigles...@igalia.com We will use this parameter in later patches to provide information relevant to transform feedback that needs to be set as part of the FF_SYNC message. Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com --- src/mesa/drivers/dri/i965/brw_defines.h | 4 src/mesa/drivers/dri/i965/brw_vec4.h | 3 ++- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 +--- src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp| 3 ++- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6e8b998..b0d6d9f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1030,6 +1030,10 @@ enum opcode { * FF_SYNC operation. * * - src1 is the number of primitives written. +* +* - src2 is the value to hold in M0.0: number of SO vertices to write +* and number of SO primitives needed. Its value will be overwritten +* with the SVBI values if transform feedback is enabled. */ GS_OPCODE_FF_SYNC, diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 763cb23..58a5aac 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -679,7 +679,8 @@ private: struct brw_reg src2); void generate_gs_ff_sync(struct brw_reg dst, struct brw_reg src0, -struct brw_reg src1); +struct brw_reg src1, +struct brw_reg src2); void generate_gs_set_primitive_id(struct brw_reg dst); void generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index d4554f5..c69b305 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -734,7 +734,8 @@ vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst, void vec4_generator::generate_gs_ff_sync(struct brw_reg dst, struct brw_reg src0, -struct brw_reg src1) +struct brw_reg src1, +struct brw_reg src2) { /* We use dst to setup the ff_sync header, so we expect it to be * initialized to R0 by the caller. Here we overwrite dword 0 (cleared @@ -744,7 +745,7 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0)); + brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src2, 0)); brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0)); brw_set_default_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); @@ -763,6 +764,15 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst, brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0)); + + /* src2 is not an immediate when we use transform feedback */ + if (src2.file != BRW_IMMEDIATE_VALUE) { + brw_MOV(p, suboffset(vec1(src2), 0), suboffset(vec1(src0), 1)); + brw_MOV(p, suboffset(vec1(src2), 1), suboffset(vec1(src0), 2)); + brw_MOV(p, suboffset(vec1(src2), 2), suboffset(vec1(src0), 3)); + brw_MOV(p, suboffset(vec1(src2), 3), suboffset(vec1(src0), 4)); Ken and I discussed this a bit. Ken suggested that this: brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1)); Should be able to copy all 4 dwords in one instruction. What do you think? Sure, if we can do this in just on MOV that is better. I'll give it a try. Piglit shows no regressions with this change, it works like a charm! We will add it to the commit. Thanks, Sam signature.asc Description: This is a digitally signed message part ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 27/37] i965/gen6/gs: Add an additional parameter to the FF_SYNC opcode.
From: Samuel Iglesias Gonsalvez sigles...@igalia.com We will use this parameter in later patches to provide information relevant to transform feedback that needs to be set as part of the FF_SYNC message. Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com --- src/mesa/drivers/dri/i965/brw_defines.h | 4 src/mesa/drivers/dri/i965/brw_vec4.h | 3 ++- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 +--- src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp| 3 ++- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6e8b998..b0d6d9f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1030,6 +1030,10 @@ enum opcode { * FF_SYNC operation. * * - src1 is the number of primitives written. +* +* - src2 is the value to hold in M0.0: number of SO vertices to write +* and number of SO primitives needed. Its value will be overwritten +* with the SVBI values if transform feedback is enabled. */ GS_OPCODE_FF_SYNC, diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 763cb23..58a5aac 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -679,7 +679,8 @@ private: struct brw_reg src2); void generate_gs_ff_sync(struct brw_reg dst, struct brw_reg src0, -struct brw_reg src1); +struct brw_reg src1, +struct brw_reg src2); void generate_gs_set_primitive_id(struct brw_reg dst); void generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index d4554f5..c69b305 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -734,7 +734,8 @@ vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst, void vec4_generator::generate_gs_ff_sync(struct brw_reg dst, struct brw_reg src0, -struct brw_reg src1) +struct brw_reg src1, +struct brw_reg src2) { /* We use dst to setup the ff_sync header, so we expect it to be * initialized to R0 by the caller. Here we overwrite dword 0 (cleared @@ -744,7 +745,7 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0)); + brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src2, 0)); brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0)); brw_set_default_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); @@ -763,6 +764,15 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst, brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0)); + + /* src2 is not an immediate when we use transform feedback */ + if (src2.file != BRW_IMMEDIATE_VALUE) { + brw_MOV(p, suboffset(vec1(src2), 0), suboffset(vec1(src0), 1)); + brw_MOV(p, suboffset(vec1(src2), 1), suboffset(vec1(src0), 2)); + brw_MOV(p, suboffset(vec1(src2), 2), suboffset(vec1(src0), 3)); + brw_MOV(p, suboffset(vec1(src2), 3), suboffset(vec1(src0), 4)); + } + brw_set_default_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); } @@ -1374,7 +1384,7 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, break; case GS_OPCODE_FF_SYNC: - generate_gs_ff_sync(dst, src[0], src[1]); + generate_gs_ff_sync(dst, src[0], src[1], src[2]); break; case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index b45c381..c1cfe75 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -331,7 +331,8 @@ gen6_gs_visitor::emit_thread_end() { this-current_annotation = gen6 thread end: ff_sync; emit(GS_OPCODE_FF_SYNC, - dst_reg(MRF, base_mrf), this-temp, this-prim_count); + dst_reg(MRF, base_mrf), this-temp, this-prim_count, + brw_imm_ud(0u)); /* Loop over all buffered vertices and emit URB write messages */ this-current_annotation = gen6 thread end: urb writes init; -- 1.9.1 ___ mesa-dev mailing list