Random side-note: this half-fixes one of my gripes with blorp-nir. Now if only I could figure out how to fix it the rest of the way... --Jason
On Wed, May 18, 2016 at 3:00 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: > This handles gl_FragCoord transformations and other window system vs. > user FBO coordinate system flipping by multiplying/adding uniform > values, rather than recompiles. > > This is much better because we have no decent way to guess whether > the application is going to use a shader with the window system FBO > or a user FBO, much less the drawable height. This led to a lot of > recompiles in many applications. > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/intel/vulkan/anv_pipeline.c | 5 +++++ > src/mesa/drivers/dri/i965/brw_defines.h | 1 - > src/mesa/drivers/dri/i965/brw_fs.cpp | 25 > +++---------------------- > src/mesa/drivers/dri/i965/brw_fs.h | 6 ++---- > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 8 ++++---- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 22 +++++++--------------- > src/mesa/drivers/dri/i965/brw_nir.c | 13 +++++++++++++ > src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 16 +++++----------- > 8 files changed, 39 insertions(+), 57 deletions(-) > > diff --git a/src/intel/vulkan/anv_pipeline.c > b/src/intel/vulkan/anv_pipeline.c > index faa0adb..a9cecd6 100644 > --- a/src/intel/vulkan/anv_pipeline.c > +++ b/src/intel/vulkan/anv_pipeline.c > @@ -142,6 +142,11 @@ anv_shader_compile_to_nir(struct anv_device *device, > > free(spec_entries); > > + if (stage == MESA_SHADER_FRAGMENT) { > + nir_lower_wpos_center(nir); > + nir_validate_shader(nir); > + } > + > nir_lower_returns(nir); > nir_validate_shader(nir); > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index 3395c9b..666dfd9 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -1103,7 +1103,6 @@ enum opcode { > FS_OPCODE_DDX_FINE, > /** > * Compute dFdy(), dFdyCoarse(), or dFdyFine(). > - * src1 is an immediate storing the key->render_to_fbo boolean. > */ > FS_OPCODE_DDY_COARSE, > FS_OPCODE_DDY_FINE, > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 65b64b6..7b761f0 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -1058,37 +1058,18 @@ fs_visitor::import_uniforms(fs_visitor *v) > } > > fs_reg * > -fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, > - bool origin_upper_left) > +fs_visitor::emit_fragcoord_interpolation() > { > assert(stage == MESA_SHADER_FRAGMENT); > - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; > fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::vec4_type)); > fs_reg wpos = *reg; > - bool flip = !origin_upper_left ^ key->render_to_fbo; > > /* gl_FragCoord.x */ > - if (pixel_center_integer) { > - bld.MOV(wpos, this->pixel_x); > - } else { > - bld.ADD(wpos, this->pixel_x, brw_imm_f(0.5f)); > - } > + bld.MOV(wpos, this->pixel_x); > wpos = offset(wpos, bld, 1); > > /* gl_FragCoord.y */ > - if (!flip && pixel_center_integer) { > - bld.MOV(wpos, this->pixel_y); > - } else { > - fs_reg pixel_y = this->pixel_y; > - float offset = (pixel_center_integer ? 0.0f : 0.5f); > - > - if (flip) { > - pixel_y.negate = true; > - offset += key->drawable_height - 1.0f; > - } > - > - bld.ADD(wpos, pixel_y, brw_imm_f(offset)); > - } > + bld.MOV(wpos, this->pixel_y); > wpos = offset(wpos, bld, 1); > > /* gl_FragCoord.z */ > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index ac270cd..bcfe032 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -183,8 +183,7 @@ public: > > void emit_dummy_fs(); > void emit_repclear_shader(); > - fs_reg *emit_fragcoord_interpolation(bool pixel_center_integer, > - bool origin_upper_left); > + fs_reg *emit_fragcoord_interpolation(); > fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp, > glsl_interp_qualifier interpolation_mode, > bool is_centroid, bool is_sample); > @@ -456,8 +455,7 @@ private: > struct brw_reg dst, > struct brw_reg src); > void generate_ddx(enum opcode op, struct brw_reg dst, struct brw_reg > src); > - void generate_ddy(enum opcode op, struct brw_reg dst, struct brw_reg > src, > - bool negate_value); > + void generate_ddy(enum opcode op, struct brw_reg dst, struct brw_reg > src); > void generate_scratch_write(fs_inst *inst, struct brw_reg src); > void generate_scratch_read(fs_inst *inst, struct brw_reg dst); > void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst); > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > index b9000d6..ed790b5 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > @@ -1099,9 +1099,10 @@ fs_generator::generate_ddx(enum opcode opcode, > */ > void > fs_generator::generate_ddy(enum opcode opcode, > - struct brw_reg dst, struct brw_reg src, > - bool negate_value) > + struct brw_reg dst, struct brw_reg src) > { > + bool negate_value = true; > + > if (opcode == FS_OPCODE_DDY_FINE) { > /* From the Ivy Bridge PRM, volume 4 part 3, section 3.3.9 (Register > * Region Restrictions): > @@ -2140,8 +2141,7 @@ fs_generator::generate_code(const cfg_t *cfg, int > dispatch_width) > break; > case FS_OPCODE_DDY_COARSE: > case FS_OPCODE_DDY_FINE: > - assert(src[1].file == BRW_IMMEDIATE_VALUE); > - generate_ddy(inst->opcode, dst, src[0], src[1].ud); > + generate_ddy(inst->opcode, dst, src[0]); > break; > > case SHADER_OPCODE_GEN4_SCRATCH_WRITE: > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index ebcc92a..a3ff8d4 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -62,8 +62,7 @@ fs_visitor::nir_setup_inputs() > > fs_reg reg; > if (var->data.location == VARYING_SLOT_POS) { > - reg = > *emit_fragcoord_interpolation(var->data.pixel_center_integer, > - var->data.origin_upper_left); > + reg = *emit_fragcoord_interpolation(); > emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(), > input, reg), 0xF); > } else if (var->data.location == VARYING_SLOT_LAYER) { > @@ -879,22 +878,18 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, > nir_alu_instr *instr) > break; > case nir_op_fddy: > if (fs_key->high_quality_derivatives) { > - inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0], > - brw_imm_d(fs_key->render_to_fbo)); > + inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0]); > } else { > - inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0], > - brw_imm_d(fs_key->render_to_fbo)); > + inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0]); > } > inst->saturate = instr->dest.saturate; > break; > case nir_op_fddy_fine: > - inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0], > - brw_imm_d(fs_key->render_to_fbo)); > + inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0]); > inst->saturate = instr->dest.saturate; > break; > case nir_op_fddy_coarse: > - inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0], > - brw_imm_d(fs_key->render_to_fbo)); > + inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0]); > inst->saturate = instr->dest.saturate; > break; > > @@ -3064,12 +3059,9 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder > &bld, > case nir_intrinsic_interp_var_at_offset: { > nir_const_value *const_offset = > nir_src_as_const_value(instr->src[0]); > > - const bool flip = !wm_key->render_to_fbo; > - > if (const_offset) { > unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & > 0xf; > - unsigned off_y = MIN2((int)(const_offset->f32[1] * 16 * > - (flip ? -1 : 1)), 7) & 0xf; > + unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & > 0xf; > > emit_pixel_interpolater_send(bld, > > FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, > @@ -3086,7 +3078,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder > &bld, > bld.MUL(temp, offset(offset_src, bld, i), > brw_imm_f(16.0f)); > fs_reg itemp = vgrf(glsl_type::int_type); > /* float to int */ > - bld.MOV(itemp, (i == 1 && flip) ? negate(temp) : temp); > + bld.MOV(itemp, temp); > > /* Clamp the upper end of the range to +7/16. > * ARB_gpu_shader5 requires that we support a maximum > offset > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c > b/src/mesa/drivers/dri/i965/brw_nir.c > index 9afd036..372b746 100644 > --- a/src/mesa/drivers/dri/i965/brw_nir.c > +++ b/src/mesa/drivers/dri/i965/brw_nir.c > @@ -27,6 +27,7 @@ > #include "compiler/nir/glsl_to_nir.h" > #include "compiler/nir/nir_builder.h" > #include "program/prog_to_nir.h" > +#include "program/prog_parameter.h" > > static bool > is_input(nir_intrinsic_instr *intrin) > @@ -573,6 +574,18 @@ brw_create_nir(struct brw_context *brw, > > nir = brw_preprocess_nir(brw->intelScreen->compiler, nir); > > + if (stage == MESA_SHADER_FRAGMENT) { > + static const struct nir_lower_wpos_ytransform_options wpos_options > = { > + .state_tokens = {STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, > 0, 0}, > + .fs_coord_pixel_center_integer = 1, > + .fs_coord_origin_upper_left = 1, > + }; > + _mesa_add_state_reference(prog->Parameters, > + (gl_state_index *) > wpos_options.state_tokens); > + > + OPT(nir_lower_wpos_ytransform, &wpos_options); > + } > + > OPT(nir_lower_system_values); > OPT_V(brw_nir_lower_uniforms, is_scalar); > > diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp > b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp > index 15d99fa..b752ad5 100644 > --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp > +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp > @@ -157,17 +157,11 @@ brw_nir_setup_arb_uniforms(nir_shader *shader, > struct gl_program *prog, > { > struct gl_program_parameter_list *plist = prog->Parameters; > > -#ifndef NDEBUG > - if (!shader->uniforms.is_empty()) { > - /* For ARB programs, only a single "parameters" variable is > generated to > - * support uniform data. > - */ > - assert(shader->uniforms.length() == 1); > - nir_variable *var = (nir_variable *) shader->uniforms.get_head(); > - assert(strcmp(var->name, "parameters") == 0); > - assert(var->type->array_size() == (int)plist->NumParameters); > - } > -#endif > + /* For ARB programs, prog_to_nir generates a single "parameters" > variable > + * for all uniform data. nir_lower_wpos_ytransform may also create an > + * additional variable. > + */ > + assert(shader->uniforms.length() <= 2); > > for (unsigned p = 0; p < plist->NumParameters; p++) { > /* Parameters should be either vec4 uniforms or single component > -- > 2.8.2 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev