On Thu, 2014-09-18 at 16:05 -0700, Jordan Justen wrote: > On Thu, Aug 14, 2014 at 4:12 AM, Iago Toral Quiroga <ito...@igalia.com> wrote: > > From: Samuel Iglesias Gonsalvez <sigles...@igalia.com> > > > > This takes care of generating code required to handle transform feedback. > > Notice that transform feedback isn't enabled yet, since that requires > > additional setups in other parts of the code that will come in later > > patches. > > > > Signed-off-by: Samuel Iglesias Gonsalvez <sigles...@igalia.com> > > --- > > src/mesa/drivers/dri/i965/brw_context.h | 113 ++++++---- > > src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 309 > > +++++++++++++++++++++++++- > > src/mesa/drivers/dri/i965/gen6_gs_visitor.h | 14 ++ > > 3 files changed, 391 insertions(+), 45 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > > b/src/mesa/drivers/dri/i965/brw_context.h > > index 7439da1..3418b76 100644 > > --- a/src/mesa/drivers/dri/i965/brw_context.h > > +++ b/src/mesa/drivers/dri/i965/brw_context.h > > @@ -553,48 +553,6 @@ struct brw_vs_prog_data { > > bool uses_vertexid; > > }; > > > > - > > -/* Note: brw_gs_prog_data_compare() must be updated when adding fields to > > - * this struct! > > - */ > > -struct brw_gs_prog_data > > -{ > > - struct brw_vec4_prog_data base; > > - > > - /** > > - * Size of an output vertex, measured in HWORDS (32 bytes). > > - */ > > - unsigned output_vertex_size_hwords; > > - > > - unsigned output_topology; > > - > > - /** > > - * Size of the control data (cut bits or StreamID bits), in hwords (32 > > - * bytes). 0 if there is no control data. > > - */ > > - unsigned control_data_header_size_hwords; > > - > > - /** > > - * Format of the control data (either > > GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID > > - * if the control data is StreamID bits, or > > - * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut > > bits). > > - * Ignored if control_data_header_size is 0. > > - */ > > - unsigned control_data_format; > > - > > - bool include_primitive_id; > > - > > - int invocations; > > - > > - /** > > - * Dispatch mode, can be any of: > > - * GEN7_GS_DISPATCH_MODE_DUAL_OBJECT > > - * GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE > > - * GEN7_GS_DISPATCH_MODE_SINGLE > > - */ > > - int dispatch_mode; > > -}; > > - > > /** Number of texture sampler units */ > > #define BRW_MAX_TEX_UNIT 32 > > > > @@ -641,6 +599,77 @@ struct brw_gs_prog_data > > #define SURF_INDEX_GEN6_SOL_BINDING(t) (t) > > #define BRW_MAX_GEN6_GS_SURFACES > > SURF_INDEX_GEN6_SOL_BINDING(BRW_MAX_SOL_BINDINGS) > > > > +/* Note: brw_gs_prog_data_compare() must be updated when adding fields to > > + * this struct! > > + */ > > +struct brw_gs_prog_data > > +{ > > + struct brw_vec4_prog_data base; > > + > > + /** > > + * Size of an output vertex, measured in HWORDS (32 bytes). > > + */ > > + unsigned output_vertex_size_hwords; > > + > > + unsigned output_topology; > > + > > + /** > > + * Size of the control data (cut bits or StreamID bits), in hwords (32 > > + * bytes). 0 if there is no control data. > > + */ > > + unsigned control_data_header_size_hwords; > > + > > + /** > > + * Format of the control data (either > > GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID > > + * if the control data is StreamID bits, or > > + * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut > > bits). > > + * Ignored if control_data_header_size is 0. > > + */ > > + unsigned control_data_format; > > + > > + bool include_primitive_id; > > + > > + int invocations; > > + > > + /** > > + * Dispatch mode, can be any of: > > + * GEN7_GS_DISPATCH_MODE_DUAL_OBJECT > > + * GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE > > + * GEN7_GS_DISPATCH_MODE_SINGLE > > + */ > > + int dispatch_mode; > > + > > + /** > > + * Gen6 transform feedback enabled flag. > > + */ > > + bool gen6_xfb_enabled; > > + > > + /** > > + * Gen6: Provoking vertex convention for odd-numbered triangles > > + * in tristrips. > > + */ > > + GLuint pv_first:1; > > + > > + /** > > + * Gen6: Number of varyings that are output to transform feedback. > > + */ > > + GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */ > > + > > + /** > > + * Gen6: Map from the index of a transform feedback binding table entry > > to the > > + * gl_varying_slot that should be streamed out through that binding > > table > > + * entry. > > + */ > > + unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS]; > > + > > + /** > > + * Gen6: Map from the index of a transform feedback binding table entry > > to the > > + * swizzles that should be used when streaming out data through that > > + * binding table entry. > > + */ > > + unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS]; > > +}; > > + > > /** > > * Stride in bytes between shader_time entries. > > * > > diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp > > b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp > > index c1cfe75..b8eaa58 100644 > > --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp > > +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp > > @@ -97,6 +97,45 @@ gen6_gs_visitor::emit_prolog() > > this->prim_count = src_reg(this, glsl_type::uint_type); > > emit(MOV(dst_reg(this->prim_count), 0u)); > > > > + if (c->prog_data.gen6_xfb_enabled) { > > + const struct gl_transform_feedback_info *linked_xfb_info = > > + &this->shader_prog->LinkedTransformFeedback; > > + > > + /* Gen6 geometry shaders are required to ask for Streamed Vertex > > Buffer > > + * Indices values via FF_SYNC message, when Transform Feedback is > > + * enabled. > > + * > > + * To achieve this we buffer the Transform feedback outputs for each > > + * emitted vertex in xfb_output during operation. Then, when we have > > + * processed the last vertex (that is, at thread end time), we know > > all > > + * the required data for the FF_SYNC message header in order to > > receive > > + * the SVBI in the writeback. > > + * > > + * For each emitted vertex, xfb_output will hold > > + * num_transform_feedback_bindings data items plus one, which will > > + * indicate the end of the primitive. Next vertex's data comes right > > + * after. > > + */ > > + this->xfb_output = src_reg(this, > > + glsl_type::uint_type, > > + linked_xfb_info->NumOutputs * > > + c->gp->program.VerticesOut); > > + this->xfb_output_offset = src_reg(this, glsl_type::uint_type); > > + emit(MOV(dst_reg(this->xfb_output_offset), src_reg(0u))); > > + /* Create a virtual register to hold destination indices in SOL */ > > + this->destination_indices = src_reg(this, glsl_type::uvec4_type); > > + /* Create a virtual register to hold temporal values in SOL */ > > + this->sol_temp = src_reg(this, glsl_type::uvec4_type); > > What is the duration of liveness for sol_temp? > > Would it be better to generate a new temp in each function to help out > register allocation? >
Yes, it is better. I have made this change: create a new temp virtual register in every place it is needed (emit_thread_end(), xfb_write(), xfb_program()). Sam > -Jordan > > > + /* Create a virtual register to hold number of written primitives */ > > + this->sol_prim_written = src_reg(this, glsl_type::uint_type); > > + /* Create a virtual register to hold Streamed Vertex Buffer Indices > > */ > > + this->svbi = src_reg(this, glsl_type::uvec4_type); > > + /* Create a virtual register to hold max values of SVBI */ > > + this->max_svbi = src_reg(this, glsl_type::uvec4_type); > > + emit(MOV(dst_reg(this->max_svbi), > > + src_reg(retype(brw_vec1_grf(1, 4), BRW_REGISTER_TYPE_UD)))); > > + } > > + > > /* PrimitveID is delivered in r0.1 of the thread payload. If the program > > * needs it we have to move it to a separate register where we can map > > * the atttribute. > > @@ -134,6 +173,9 @@ gen6_gs_visitor::visit(ir_emit_vertex *) > > BRW_CONDITIONAL_L)); > > emit(IF(BRW_PREDICATE_NORMAL)); > > { > > + if (c->prog_data.gen6_xfb_enabled) > > + xfb_buffer_output(); > > + > > /* Buffer all output slots for this vertex in vertex_output */ > > for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) { > > /* We will handle PSIZ for each vertex at thread end time since it > > @@ -330,9 +372,21 @@ gen6_gs_visitor::emit_thread_end() > > emit(IF(BRW_PREDICATE_NORMAL)); > > { > > this->current_annotation = "gen6 thread end: ff_sync"; > > - emit(GS_OPCODE_FF_SYNC, > > - dst_reg(MRF, base_mrf), this->temp, this->prim_count, > > - brw_imm_ud(0u)); > > + > > + if (c->prog_data.gen6_xfb_enabled) { > > + emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES, > > + dst_reg(this->svbi), > > + this->vertex_count, > > + this->prim_count, > > + this->sol_temp); > > + emit(GS_OPCODE_FF_SYNC, > > + dst_reg(MRF, base_mrf), this->temp, this->prim_count, > > + this->svbi); > > + } else { > > + emit(GS_OPCODE_FF_SYNC, > > + dst_reg(MRF, base_mrf), this->temp, this->prim_count, > > + brw_imm_ud(0u)); > > + } > > > > /* Loop over all buffered vertices and emit URB write messages */ > > this->current_annotation = "gen6 thread end: urb writes init"; > > @@ -412,6 +466,9 @@ gen6_gs_visitor::emit_thread_end() > > emit(ADD(dst_reg(vertex), vertex, 1u)); > > } > > emit(BRW_OPCODE_WHILE); > > + > > + if (c->prog_data.gen6_xfb_enabled) > > + xfb_write(); > > } > > emit(BRW_OPCODE_ENDIF); > > > > @@ -431,6 +488,15 @@ gen6_gs_visitor::emit_thread_end() > > * the EOT message. > > */ > > this->current_annotation = "gen6 thread end: EOT"; > > + > > + if (c->prog_data.gen6_xfb_enabled) { > > + /* When emitting EOT, set SONumPrimsWritten Increment Value. */ > > + src_reg data(this, glsl_type::uint_type); > > + emit(AND(dst_reg(data), this->sol_prim_written, > > brw_imm_ud(0xffffu))); > > + emit(SHL(dst_reg(data), data, brw_imm_ud(16u))); > > + emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data); > > + } > > + > > vec4_instruction *inst = emit(GS_OPCODE_THREAD_END); > > inst->urb_write_flags = BRW_URB_WRITE_COMPLETE | BRW_URB_WRITE_UNUSED; > > inst->base_mrf = base_mrf; > > @@ -478,4 +544,241 @@ gen6_gs_visitor::setup_payload() > > this->first_non_payload_grf = reg; > > } > > > > +void > > +gen6_gs_visitor::xfb_buffer_output() > > +{ > > + static const unsigned swizzle_for_offset[4] = { > > + BRW_SWIZZLE4(0, 1, 2, 3), > > + BRW_SWIZZLE4(1, 2, 3, 3), > > + BRW_SWIZZLE4(2, 3, 3, 3), > > + BRW_SWIZZLE4(3, 3, 3, 3) > > + }; > > + > > + struct brw_gs_prog_data *prog_data = > > + (struct brw_gs_prog_data *) &c->prog_data; > > + > > + if (!prog_data->num_transform_feedback_bindings) { > > + const struct gl_transform_feedback_info *linked_xfb_info = > > + &this->shader_prog->LinkedTransformFeedback; > > + int i; > > + > > + /* Make sure that the VUE slots won't overflow the unsigned chars in > > + * prog_data->transform_feedback_bindings[]. > > + */ > > + STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256); > > + > > + /* Make sure that we don't need more binding table entries than we've > > + * set aside for use in transform feedback. (We shouldn't, since we > > + * set aside enough binding table entries to have one per component). > > + */ > > + assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS); > > + > > + prog_data->num_transform_feedback_bindings = > > linked_xfb_info->NumOutputs; > > + for (i = 0; i < prog_data->num_transform_feedback_bindings; i++) { > > + prog_data->transform_feedback_bindings[i] = > > + linked_xfb_info->Outputs[i].OutputRegister; > > + prog_data->transform_feedback_swizzles[i] = > > + > > swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; > > + } > > + } > > + > > + /* Buffer all TF outputs for this vertex in xfb_output */ > > + for (int binding = 0; binding < > > prog_data->num_transform_feedback_bindings; > > + binding++) { > > + /* We will handle PSIZ for each vertex at thread end time since it > > + * is not computed by the GS algorithm and requires specific > > handling. > > + */ > > + unsigned varying = > > + prog_data->transform_feedback_bindings[binding]; > > + if (varying != VARYING_SLOT_PSIZ) { > > + dst_reg dst(this->xfb_output); > > + dst.reladdr = ralloc(mem_ctx, src_reg); > > + memcpy(dst.reladdr, &this->xfb_output_offset, sizeof(src_reg)); > > + dst.type = output_reg[varying].type; > > + > > + this->current_annotation = output_reg_annotation[varying]; > > + src_reg out_reg = src_reg(output_reg[varying]); > > + out_reg.swizzle = prog_data->transform_feedback_swizzles[binding]; > > + emit(MOV(dst, out_reg)); > > + } > > + emit(ADD(dst_reg(this->xfb_output_offset), this->xfb_output_offset, > > 1u)); > > + } > > +} > > + > > +void > > +gen6_gs_visitor::xfb_write() > > +{ > > + unsigned num_verts; > > + struct brw_gs_prog_data *prog_data = > > + (struct brw_gs_prog_data *) &c->prog_data; > > + > > + if (!prog_data->num_transform_feedback_bindings) > > + return; > > + > > + switch (c->prog_data.output_topology) { > > + case _3DPRIM_POINTLIST: > > + num_verts = 1; > > + break; > > + case _3DPRIM_LINELIST: > > + case _3DPRIM_LINESTRIP: > > + case _3DPRIM_LINELOOP: > > + num_verts = 2; > > + break; > > + case _3DPRIM_TRILIST: > > + case _3DPRIM_TRIFAN: > > + case _3DPRIM_TRISTRIP: > > + case _3DPRIM_RECTLIST: > > + num_verts = 3; > > + break; > > + case _3DPRIM_QUADLIST: > > + case _3DPRIM_QUADSTRIP: > > + case _3DPRIM_POLYGON: > > + num_verts = 3; > > + break; > > + default: > > + unreachable("Unexpected primitive type in Gen6 SOL program."); > > + } > > + > > + this->current_annotation = "gen6 thread end: svb writes init"; > > + > > + emit(MOV(dst_reg(this->xfb_output_offset), 0u)); > > + emit(MOV(dst_reg(this->sol_prim_written), 0u)); > > + > > + /* Check that at least one primitive can be written > > + * > > + * Note: since we use the binding table to keep track of buffer offsets > > + * and stride, the GS doesn't need to keep track of a separate pointer > > + * into each buffer; it uses a single pointer which increments by 1 for > > + * each vertex. So we use SVBI0 for this pointer, regardless of whether > > + * transform feedback is in interleaved or separate attribs mode. > > + */ > > + emit(ADD(dst_reg(this->sol_temp), this->svbi, brw_imm_ud(num_verts))); > > + > > + /* Compare SVBI calculated number with the maximum value, which is > > + * in R1.4 (previously saved in this->max_svbi) for gen6. > > + */ > > + emit(CMP(dst_null_d(), this->sol_temp, this->max_svbi, > > BRW_CONDITIONAL_LE)); > > + emit(IF(BRW_PREDICATE_NORMAL)); > > + { > > + struct src_reg destination_indices_uw = > > + retype(destination_indices, BRW_REGISTER_TYPE_UW); > > + > > + vec4_instruction *inst = emit(MOV(dst_reg(destination_indices_uw), > > + brw_imm_v(0x00020100))); /* (0, 1, > > 2) */ > > + inst->force_writemask_all = true; > > + > > + emit(ADD(dst_reg(this->destination_indices), > > + this->destination_indices, > > + this->svbi)); > > + } > > + emit(BRW_OPCODE_ENDIF); > > + > > + this->current_vertex = 0; > > + /* Make sure we do not emit more transform feedback data than the amount > > + * we have buffered. > > + */ > > + for (int i = 0; i < c->gp->program.VerticesOut; i++) { > > + emit(MOV(dst_reg(this->sol_temp), i)); > > + emit(CMP(dst_null_d(), this->sol_temp, this->vertex_count, > > + BRW_CONDITIONAL_L)); > > + emit(IF(BRW_PREDICATE_NORMAL)); > > + { > > + xfb_program(num_verts); > > + } > > + emit(BRW_OPCODE_ENDIF); > > + } > > +} > > + > > +void > > +gen6_gs_visitor::xfb_program(unsigned num_verts) > > +{ > > + struct brw_gs_prog_data *prog_data = > > + (struct brw_gs_prog_data *) &c->prog_data; > > + unsigned binding; > > + unsigned num_bindings = prog_data->num_transform_feedback_bindings; > > + > > + /* Check if we can write one primitive more */ > > + emit(ADD(dst_reg(this->sol_temp), this->sol_prim_written, 1u)); > > + emit(MUL(dst_reg(this->sol_temp), this->sol_temp, > > brw_imm_ud(num_verts))); > > + emit(ADD(dst_reg(this->sol_temp), this->sol_temp, this->svbi)); > > + emit(CMP(dst_null_d(), this->sol_temp, this->max_svbi, > > BRW_CONDITIONAL_LE)); > > + emit(IF(BRW_PREDICATE_NORMAL)); > > + { > > + if (this->current_vertex >= num_verts) > > + this->current_vertex = 0; > > + > > + /* Avoid overwriting MRF 1 as it is used as URB write message header > > */ > > + dst_reg mrf_reg(MRF, 2); > > + > > + this->current_annotation = "gen6: emit SOL vertex data"; > > + /* For each vertex, generate code to output each varying using the > > + * appropriate binding table entry. > > + */ > > + for (binding = 0; binding < num_bindings; ++binding) { > > + /* Set up the correct destination index for this vertex */ > > + vec4_instruction *inst = emit(GS_OPCODE_SVB_SET_DST_INDEX, > > + mrf_reg, > > + this->destination_indices); > > + inst->sol_vertex = this->current_vertex; > > + > > + unsigned char varying = > > + prog_data->transform_feedback_bindings[binding]; > > + > > + /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1: > > + * > > + * "Prior to End of Thread with a URB_WRITE, the kernel must > > + * ensure that all writes are complete by sending the final > > + * write as a committed write." > > + */ > > + bool final_write = binding == (unsigned) num_bindings - 1 && > > + this->current_vertex == num_verts - 1; > > + > > + /* Compute offset of this varying for the current vertex > > + * in xfb_output > > + */ > > + src_reg data(this->xfb_output); > > + data.reladdr = ralloc(mem_ctx, src_reg); > > + memcpy(data.reladdr, &this->xfb_output_offset, sizeof(src_reg)); > > + src_reg out_reg; > > + this->current_annotation = output_reg_annotation[varying]; > > + > > + if (varying == VARYING_SLOT_PSIZ) { > > + /* We did not buffer PSIZ, emit it directly here */ > > + out_reg = src_reg(output_reg[varying]); > > + out_reg.swizzle = BRW_SWIZZLE_WWWW; > > + } else { > > + /* Copy this varying to the appropriate message register */ > > + out_reg = src_reg(this, glsl_type::uvec4_type); > > + out_reg.type = output_reg[varying].type; > > + > > + data.type = output_reg[varying].type; > > + emit(MOV(dst_reg(out_reg), data)); > > + } > > + > > + /* Write data and send SVB Write */ > > + inst = emit(GS_OPCODE_SVB_WRITE, mrf_reg, out_reg, > > this->sol_temp); > > + inst->sol_binding = binding; > > + inst->sol_final_write = final_write; > > + > > + emit(ADD(dst_reg(this->xfb_output_offset), > > + this->xfb_output_offset, 1u)); > > + > > + if (final_write) { > > + /* This is the last vertex of the primitive, then increment > > + * SO num primitive counter and destination indices. > > + */ > > + emit(ADD(dst_reg(this->destination_indices), > > + this->destination_indices, > > + brw_imm_ud(num_verts))); > > + emit(ADD(dst_reg(this->sol_prim_written), > > + this->sol_prim_written, 1u)); > > + } > > + > > + } > > + this->current_vertex++; > > + this->current_annotation = NULL; > > + } > > + emit(BRW_OPCODE_ENDIF); > > +} > > + > > } /* namespace brw */ > > diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h > > b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h > > index 8d2386c..595a46f 100644 > > --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h > > +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h > > @@ -55,12 +55,26 @@ protected: > > virtual void setup_payload(); > > > > private: > > + void xfb_write(); > > + void xfb_buffer_output(); > > + void xfb_program(unsigned num_verts); > > + > > src_reg vertex_output; > > src_reg vertex_output_offset; > > src_reg temp; > > src_reg first_vertex; > > src_reg prim_count; > > src_reg primitive_id; > > + > > + /* Transform Feedback members */ > > + src_reg xfb_output; > > + src_reg xfb_output_offset; > > + src_reg sol_temp; > > + src_reg sol_prim_written; > > + src_reg svbi; > > + src_reg max_svbi; > > + src_reg destination_indices; > > + unsigned current_vertex; > > }; > > > > } /* namespace brw */ > > -- > > 1.9.1 > > > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > http://lists.freedesktop.org/mailman/listinfo/mesa-dev >
signature.asc
Description: This is a digitally signed message part
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev