On Mon, Sep 7, 2015 at 11:06 AM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > On Thu, Sep 3, 2015 at 1:48 AM, Kenneth Graunke <kenn...@whitecape.org> wrote: >> By performing the vertex counting in NIR, we're able to elide a ton of >> useless safety checks around every EmitVertex() call: >> >> total instructions in shared programs: 3952 -> 3720 (-5.87%) >> instructions in affected programs: 3491 -> 3259 (-6.65%) >> helped: 11 >> HURT: 0 >> >> Improves performance in Gl32GSCloth by 0.671742% +/- 0.142202% (n=621) >> on Haswell GT3e at 1024x768. >> >> This should also make it easier to implement Broadwell's "Static Vertex >> Count" feature someday. >> >> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> >> --- >> src/mesa/drivers/dri/i965/brw_nir.c | 5 ++++ >> src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp | 13 +++++++++-- >> src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 28 >> ++++++++++++----------- >> src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 28 >> ++++++++++++++--------- >> 4 files changed, 48 insertions(+), 26 deletions(-) >> >> There are a couple regressions left with this patch, which are actually bugs >> in the NIR control flow graph modification code. I wrote a fix, which IIRC >> got this to zero regressions. But I've found even more bugs in the NIR CFG >> code, so I'm working on yet more fixes. I'll sort that out before landing >> this, but figured I may as well send it out for review in the meantime - I've >> had these patches a long time. >> >> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c >> b/src/mesa/drivers/dri/i965/brw_nir.c >> index 8f3edc5..15e79ba 100644 >> --- a/src/mesa/drivers/dri/i965/brw_nir.c >> +++ b/src/mesa/drivers/dri/i965/brw_nir.c >> @@ -93,6 +93,11 @@ brw_create_nir(struct brw_context *brw, >> } >> nir_validate_shader(nir); >> >> + if (stage == MESA_SHADER_GEOMETRY) { >> + nir_lower_gs_intrinsics(nir); >> + nir_validate_shader(nir); >> + } >> + >> nir_lower_global_vars_to_local(nir); >> nir_validate_shader(nir); >> >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp >> index 8a8dd57..4f4e1e1 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp >> @@ -92,16 +92,25 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr >> *instr) >> src_reg src; >> >> switch (instr->intrinsic) { >> - case nir_intrinsic_emit_vertex: { >> + case nir_intrinsic_emit_vertex_with_counter: { >> + this->vertex_count = >> + retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); >> int stream_id = instr->const_index[0]; >> gs_emit_vertex(stream_id); >> break; >> } >> >> - case nir_intrinsic_end_primitive: >> + case nir_intrinsic_end_primitive_with_counter: >> + this->vertex_count = >> + retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); > > It seems kind of odd to me that we are seetting a vertex_count state > variable in this case and in the one above. Wouldn't it be better to > pass the vertex count in to end_primitive() and emit_vertex()? That > can be a refactor for another day though; after we delete the old > vec4_visitor code. > > Modulo getting the CF bugs fixed, this series is
And the comments on patch 3 of course. > Reviewed-by: Jason Ekstrand <jason.ekstr...@intel.com> > >> gs_end_primitive(); >> break; >> >> + case nir_intrinsic_set_vertex_count: >> + this->vertex_count = >> + retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); >> + break; >> + >> case nir_intrinsic_load_invocation_id: { >> src_reg invocation_id = >> src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]); >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp >> index 019efec..70c0f8e 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp >> @@ -484,14 +484,6 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) >> if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0) >> return; >> >> - /* To ensure that we don't output more vertices than the shader specified >> - * using max_vertices, do the logic inside a conditional of the form "if >> - * (vertex_count < MAX)" >> - */ >> - unsigned num_output_vertices = c->gp->program.VerticesOut; >> - emit(CMP(dst_null_d(), this->vertex_count, >> - src_reg(num_output_vertices), BRW_CONDITIONAL_L)); >> - emit(IF(BRW_PREDICATE_NORMAL)); >> { >> /* If we're outputting 32 control data bits or less, then we can wait >> * until the shader is over to output them all. Otherwise we need to >> @@ -562,12 +554,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) >> this->current_annotation = "emit vertex: Stream control data >> bits"; >> set_stream_control_data_bits(stream_id); >> } >> - >> - this->current_annotation = "emit vertex: increment vertex count"; >> - emit(ADD(dst_reg(this->vertex_count), this->vertex_count, >> - src_reg(1u))); >> } >> - emit(BRW_OPCODE_ENDIF); >> >> this->current_annotation = NULL; >> } >> @@ -575,7 +562,22 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) >> void >> vec4_gs_visitor::visit(ir_emit_vertex *ir) >> { >> + /* To ensure that we don't output more vertices than the shader specified >> + * using max_vertices, do the logic inside a conditional of the form "if >> + * (vertex_count < MAX)" >> + */ >> + unsigned num_output_vertices = c->gp->program.VerticesOut; >> + emit(CMP(dst_null_d(), this->vertex_count, >> + src_reg(num_output_vertices), BRW_CONDITIONAL_L)); >> + emit(IF(BRW_PREDICATE_NORMAL)); >> + >> gs_emit_vertex(ir->stream_id()); >> + >> + this->current_annotation = "emit vertex: increment vertex count"; >> + emit(ADD(dst_reg(this->vertex_count), this->vertex_count, >> + src_reg(1u))); >> + >> + emit(BRW_OPCODE_ENDIF); >> } >> >> void >> diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp >> b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp >> index 68e443d..5cfff7b 100644 >> --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp >> +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp >> @@ -149,19 +149,29 @@ gen6_gs_visitor::emit_prolog() >> void >> gen6_gs_visitor::visit(ir_emit_vertex *ir) >> { >> + /* To ensure that we don't output more vertices than the shader specified >> + * using max_vertices, do the logic inside a conditional of the form "if >> + * (vertex_count < MAX)" >> + */ >> + unsigned num_output_vertices = c->gp->program.VerticesOut; >> + emit(CMP(dst_null_d(), this->vertex_count, >> + src_reg(num_output_vertices), BRW_CONDITIONAL_L)); >> + emit(IF(BRW_PREDICATE_NORMAL)); >> + >> gs_emit_vertex(ir->stream_id()); >> + >> + this->current_annotation = "emit vertex: increment vertex count"; >> + emit(ADD(dst_reg(this->vertex_count), this->vertex_count, >> + src_reg(1u))); >> + >> + emit(BRW_OPCODE_ENDIF); >> } >> + >> void >> gen6_gs_visitor::gs_emit_vertex(int stream_id) >> { >> this->current_annotation = "gen6 emit vertex"; >> - /* Honor max_vertex layout indication in geometry shader by ignoring any >> - * vertices coming after c->gp->program.VerticesOut. >> - */ >> - unsigned num_output_vertices = c->gp->program.VerticesOut; >> - emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices), >> - BRW_CONDITIONAL_L)); >> - emit(IF(BRW_PREDICATE_NORMAL)); >> + >> { >> /* Buffer all output slots for this vertex in vertex_output */ >> for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) { >> @@ -219,11 +229,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id) >> } >> emit(ADD(dst_reg(this->vertex_output_offset), >> this->vertex_output_offset, 1u)); >> - >> - /* Update vertex count */ >> - emit(ADD(dst_reg(this->vertex_count), this->vertex_count, 1u)); >> } >> - emit(BRW_OPCODE_ENDIF); >> } >> >> void >> -- >> 2.5.0 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev