On 23 December 2011 13:02, Kenneth Graunke <kenn...@whitecape.org> wrote:
> On 12/23/2011 09:37 AM, Paul Berry wrote: > > When rendering triangle strips, vertices come down the pipeline in the > > order specified, even though this causes alternate triangles to have > > reversed winding order. For example, if the vertices are ABCDE, then > > the GS is invoked on triangles ABC, BCD, and CDE, even though this > > means that triangle BCD is in the reverse of the normal winding order. > > The hardware automatically flags the triangles with reversed winding > > order as _3DPRIM_TRISTRIP_REVERSE, so that face culling and two-sided > > coloring can be adjusted to account for the reversed order. > > > > In order to ensure that winding order is correct when streaming > > vertices out to a transform feedback buffer, we need to alter the > > ordering of BCD to BDC when the first provoking vertex convention is > > in use, and to CBD when the last provoking vertex convention is in > > use. > > > > To do this, we precompute an array of indices indicating where each > > vertex will be placed in the transform feedback buffer; normally this > > is SVBI[0] + (0, 1, 2), indicating that vertex order should be > > preserved. When the primitive type is _3DPRIM_TRISTRIP_REVERSE, we > > change this order to either SVBI[0] + (0, 2, 1) or SVBI[0] + (1, 0, > > 2), depending on the provoking vertex convention. > > > > Fixes piglit tests "EXT_transform_feedback/tessellation > > triangle_strip" on Gen6. > > It's really too bad we can't use the 3DSTATE_GS "Reorder Enable" bit. > It's supposed to take care of this, but it looks like it only does PV > first mode on Sandybridge. Turning it on makes triangle_strip wireframe > fail, and nothing else pass. > Yeah, I concur. That bit is *so* close to what we need, but not quite close enough. > > I had a bit of trouble following this patch, but it looks okay to me. > Ok. I won't push it until this evening, so let me know if you can think of a way to improve the readability (I'm at a loss for how to improve it). > > Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> > > > --- > > src/mesa/drivers/dri/i965/brw_gs.h | 6 ++ > > src/mesa/drivers/dri/i965/brw_gs_emit.c | 84 > ++++++++++++++++++++++++------- > > 2 files changed, 72 insertions(+), 18 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_gs.h > b/src/mesa/drivers/dri/i965/brw_gs.h > > index 7bf2248..2ab8b72 100644 > > --- a/src/mesa/drivers/dri/i965/brw_gs.h > > +++ b/src/mesa/drivers/dri/i965/brw_gs.h > > @@ -83,6 +83,12 @@ struct brw_gs_compile { > > struct brw_reg vertex[MAX_GS_VERTS]; > > struct brw_reg header; > > struct brw_reg temp; > > + > > + /** > > + * Register holding destination indices for streamed buffer > writes. > > + * Only used for SOL programs. > > + */ > > + struct brw_reg destination_indices; > > } reg; > > > > /* Number of registers used to store vertex data */ > > diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c > b/src/mesa/drivers/dri/i965/brw_gs_emit.c > > index 1f96a16..607ee75 100644 > > --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c > > +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c > > @@ -45,13 +45,16 @@ > > /** > > * Allocate registers for GS. > > * > > - * If svbi_payload_enable is true, then the thread will be spawned with > the > > - * "SVBI Payload Enable" bit set, so GRF 1 needs to be set aside to > hold the > > - * streamed vertex buffer indices. > > + * If sol_program is true, then: > > + * > > + * - The thread will be spawned with the "SVBI Payload Enable" bit set, > so GRF > > + * 1 needs to be set aside to hold the streamed vertex buffer indices. > > + * > > + * - The thread will need to use the destination_indices register. > > */ > > static void brw_gs_alloc_regs( struct brw_gs_compile *c, > > GLuint nr_verts, > > - bool svbi_payload_enable ) > > + bool sol_program ) > > { > > GLuint i = 0,j; > > > > @@ -60,7 +63,7 @@ static void brw_gs_alloc_regs( struct brw_gs_compile > *c, > > c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; > > > > /* Streamed vertex buffer indices */ > > - if (svbi_payload_enable) > > + if (sol_program) > > c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); > > > > /* Payload vertices plus space for more generated vertices: > > @@ -73,6 +76,11 @@ static void brw_gs_alloc_regs( struct brw_gs_compile > *c, > > c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); > > c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); > > > > + if (sol_program) { > > + c->reg.destination_indices = > > + retype(brw_vec4_grf(i++, 0), BRW_REGISTER_TYPE_UD); > > + } > > + > > c->prog_data.urb_read_length = c->nr_regs; > > c->prog_data.total_grf = i; > > } > > @@ -351,16 +359,17 @@ gen6_sol_program(struct brw_gs_compile *c, struct > brw_gs_prog_key *key, > > > > if (key->num_transform_feedback_bindings > 0) { > > unsigned vertex, binding; > > + struct brw_reg destination_indices_uw = > > + vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW)); > > + > > /* Note: since we use the binding table to keep track of buffer > offsets > > * and stride, the GS doesn't need to keep track of a separate > pointer > > * into each buffer; it uses a single pointer which increments by > 1 for > > * each vertex. So we use SVBI0 for this pointer, regardless of > whether > > * transform feedback is in interleaved or separate attribs mode. > > + * > > + * Make sure that the buffers have enough room for all the > vertices. > > */ > > - brw_MOV(p, get_element_ud(c->reg.header, 5), > > - get_element_ud(c->reg.SVBI, 0)); > > - > > - /* Make sure that the buffers have enough room for all the > vertices. */ > > brw_ADD(p, get_element_ud(c->reg.temp, 0), > > get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts)); > > brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, > > @@ -368,10 +377,58 @@ gen6_sol_program(struct brw_gs_compile *c, struct > brw_gs_prog_key *key, > > get_element_ud(c->reg.SVBI, 4)); > > brw_IF(p, BRW_EXECUTE_1); > > > > + /* Compute the destination indices to write to. Usually we use > SVBI[0] > > + * + (0, 1, 2). However, for odd-numbered triangles in > tristrips, the > > + * vertices come down the pipeline in reversed winding order, so > we need > > + * to flip the order when writing to the transform feedback > buffer. To > > + * ensure that flatshading accuracy is preserved, we need to > write them > > + * in order SVBI[0] + (0, 2, 1) if we're using the first provoking > > + * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're > using > > + * the last provoking vertex convention. > > + * > > + * Note: since brw_imm_v can only be used in instructions in > > + * packed-word execution mode, and SVBI is a double-word, we need > to > > + * first move the appropriate immediate constant ((0, 1, 2), (0, > 2, 1), > > + * or (1, 0, 2)) to the destination_indices register, and then > add SVBI > > + * using a separate instruction. Also, since the immediate > constant is > > + * expressed as packed words, and we need to load double-words > into > > + * destination_indices, we need to intersperse zeros to fill the > upper > > + * halves of each double-word. > > + */ > > + brw_MOV(p, destination_indices_uw, > > + brw_imm_v(0x00020100)); /* (0, 1, 2) */ > > + if (num_verts == 3) { > > + /* Get primitive type into temp register. */ > > + brw_AND(p, get_element_ud(c->reg.temp, 0), > > + get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f)); > > + > > + /* Test if primitive type is TRISTRIP_REVERSE. We need to do > this as > > + * an 8-wide comparison so that the conditional MOV that > follows > > + * moves all 8 words correctly. > > + */ > > + brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ, > > + get_element_ud(c->reg.temp, 0), > > + brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); > > + > > + /* If so, then overwrite destination_indices_uw with the > appropriate > > + * reordering. > > + */ > > + brw_MOV(p, destination_indices_uw, > > + brw_imm_v(key->pv_first ? 0x00010200 /* (0, 2, 1) */ > > + : 0x00020001)); /* (1, 0, 2) */ > > + brw_set_predicate_control(p, BRW_PREDICATE_NONE); > > + } > > + brw_ADD(p, c->reg.destination_indices, > > + c->reg.destination_indices, get_element_ud(c->reg.SVBI, > 0)); > > + > > /* For each vertex, generate code to output each varying using the > > * appropriate binding table entry. > > */ > > for (vertex = 0; vertex < num_verts; ++vertex) { > > + /* Set up the correct destination index for this vertex */ > > + brw_MOV(p, get_element_ud(c->reg.header, 5), > > + get_element_ud(c->reg.destination_indices, vertex)); > > + > > for (binding = 0; binding < > key->num_transform_feedback_bindings; > > ++binding) { > > unsigned char vert_result = > > @@ -398,15 +455,6 @@ gen6_sol_program(struct brw_gs_compile *c, struct > brw_gs_prog_key *key, > > SURF_INDEX_SOL_BINDING(binding), /* > binding_table_index */ > > final_write); /* send_commit_msg */ > > } > > - > > - /* If there are more vertices to output, increment the pointer > so > > - * that we will start outputting to the next location in the > > - * transform feedback buffers. > > - */ > > - if (vertex != num_verts - 1) { > > - brw_ADD(p, get_element_ud(c->reg.header, 5), > > - get_element_ud(c->reg.header, 5), brw_imm_ud(1)); > > - } > > } > > brw_ENDIF(p); > > > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev