Kenneth Graunke <kenn...@whitecape.org> writes: > On Friday, July 22, 2016 8:59:03 PM PDT Francisco Jerez wrote: >> The problem with the current approach is that driver output locations >> are represented as a linear offset within the nir_outputs array, which >> makes it rather difficult for the back-end to figure out what color >> output and index some nir_intrinsic_load/store_output was meant for, >> because the offset of a given output within the nir_output array is >> dependent on the type and size of all previously allocated outputs. >> Instead this defines the driver location of an output to be the pair >> formed by its GLSL-assigned location and index (I've borrowed the >> bitfield macros from brw_defines.h in order to represent the pair of >> integers as a single scalar value that can be assigned to >> nir_variable_data::driver_location). nir_assign_var_locations is no >> longer useful for fragment outputs. >> >> Because fragment outputs are now allocated independently rather than >> within the nir_outputs array, the get_frag_output() helper becomes >> necessary in order to obtain the right temporary register for a given >> location-index pair. >> >> The type_size helper passed to nir_lower_io is now type_size_dvec4 >> rather than type_size_vec4_times_4 so that output array offsets are >> provided in terms of whole array elements rather than in terms of >> scalar components (dvec4 is the largest vector type supported by the >> GLSL so this will cause all individual fragment outputs to have a size >> of one regardless of the type). > > I don't think you should need type_size_dvec4 - double-precision > fragment shader outputs don't exist. GL_ARB_gpu_shader_fp64 says: > > (3) Should double-precision fragment shader outputs be supported? > > RESOLVED: Not in this extension. Note that we don't have > double-precision framebuffer formats to accept such values. > > Why not just use type_size_vec4? With that changed, this patch would > get my Reviewed-by. >
Yeah, I'm aware of that restriction (c.f. PATCH 7), but the point here was to get output offsets counted in whole array elements regardless of the type, type_size_dvec4 is strictly better for that purpose than type_size_vec4, because dvec4 is the largest GLSL vector type, and they are otherwise equivalent when used on types smaller than a dvec4. A type_size_vectors helper may make sense instead though. >> --- >> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 50 >> +++++++++++++++++++++++++++----- >> src/mesa/drivers/dri/i965/brw_nir.c | 10 +++++-- >> src/mesa/drivers/dri/i965/brw_nir.h | 5 ++++ >> 3 files changed, 55 insertions(+), 10 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> index 28de29a..8e069e0 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> @@ -89,17 +89,19 @@ fs_visitor::nir_setup_outputs() >> nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs); >> >> nir_foreach_variable(var, &nir->outputs) { >> - fs_reg reg = offset(nir_outputs, bld, var->data.driver_location); >> - >> switch (stage) { >> case MESA_SHADER_VERTEX: >> case MESA_SHADER_TESS_EVAL: >> case MESA_SHADER_GEOMETRY: { >> + fs_reg reg = offset(nir_outputs, bld, var->data.driver_location); >> unsigned location = var->data.location; >> nir_setup_single_output_varying(®, var->type, &location); >> break; >> } >> - case MESA_SHADER_FRAGMENT: >> + case MESA_SHADER_FRAGMENT: { >> + const fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F, >> + type_size_vec4_times_4(var->type)); >> + >> if (key->force_dual_color_blend && >> var->data.location == FRAG_RESULT_DATA1) { >> this->dual_src_output = reg; >> @@ -130,6 +132,7 @@ fs_visitor::nir_setup_outputs() >> } >> } >> break; >> + } >> default: >> unreachable("unhandled shader stage"); >> } >> @@ -3244,6 +3247,38 @@ emit_non_coherent_fb_read(fs_visitor *v, const fs_reg >> &dst, unsigned target) >> return inst; >> } >> >> +static fs_reg >> +get_frag_output(const fs_visitor *v, unsigned location) >> +{ >> + assert(v->stage == MESA_SHADER_FRAGMENT); >> + const brw_wm_prog_key *const key = >> + reinterpret_cast<const brw_wm_prog_key *>(v->key); >> + const unsigned l = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_LOCATION); >> + const unsigned i = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_INDEX); >> + >> + if (i > 0 || (key->force_dual_color_blend && l == FRAG_RESULT_DATA1)) >> + return v->dual_src_output; >> + >> + else if (l == FRAG_RESULT_COLOR) >> + return v->outputs[0]; >> + >> + else if (l == FRAG_RESULT_DEPTH) >> + return v->frag_depth; >> + >> + else if (l == FRAG_RESULT_STENCIL) >> + return v->frag_stencil; >> + >> + else if (l == FRAG_RESULT_SAMPLE_MASK) >> + return v->sample_mask; >> + >> + else if (l >= FRAG_RESULT_DATA0 && >> + l < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS) >> + return v->outputs[l - FRAG_RESULT_DATA0]; >> + >> + else >> + unreachable("Invalid location"); >> +} >> + >> void >> fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, >> nir_intrinsic_instr *instr) >> @@ -3282,11 +3317,12 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder >> &bld, >> >> case nir_intrinsic_store_output: { >> const fs_reg src = get_nir_src(instr->src[0]); >> - nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); >> + const nir_const_value *const_offset = >> nir_src_as_const_value(instr->src[1]); >> assert(const_offset && "Indirect output stores not allowed"); >> - const fs_reg new_dest = offset(retype(nir_outputs, src.type), bld, >> - nir_intrinsic_base(instr) + >> - const_offset->u32[0]); >> + const unsigned location = nir_intrinsic_base(instr) + >> + SET_FIELD(const_offset->u32[0], BRW_NIR_FRAG_OUTPUT_LOCATION); >> + const fs_reg new_dest = retype(get_frag_output(this, location), >> + src.type); >> >> for (unsigned j = 0; j < instr->num_components; j++) >> bld.MOV(offset(new_dest, bld, nir_intrinsic_component(instr) + j), >> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c >> b/src/mesa/drivers/dri/i965/brw_nir.c >> index 388ae9e..ee58115 100644 >> --- a/src/mesa/drivers/dri/i965/brw_nir.c >> +++ b/src/mesa/drivers/dri/i965/brw_nir.c >> @@ -339,9 +339,13 @@ brw_nir_lower_tcs_outputs(nir_shader *nir, const struct >> brw_vue_map *vue_map) >> void >> brw_nir_lower_fs_outputs(nir_shader *nir) >> { >> - nir_assign_var_locations(&nir->outputs, &nir->num_outputs, >> - FRAG_RESULT_DATA0, type_size_vec4_times_4); >> - nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4); >> + nir_foreach_variable(var, &nir->outputs) { >> + var->data.driver_location = >> + SET_FIELD(var->data.index, BRW_NIR_FRAG_OUTPUT_INDEX) | >> + SET_FIELD(var->data.location, BRW_NIR_FRAG_OUTPUT_LOCATION); >> + } >> + >> + nir_lower_io(nir, nir_var_shader_out, type_size_dvec4); >> } >> >> void >> diff --git a/src/mesa/drivers/dri/i965/brw_nir.h >> b/src/mesa/drivers/dri/i965/brw_nir.h >> index 74c354f..be97d8f 100644 >> --- a/src/mesa/drivers/dri/i965/brw_nir.h >> +++ b/src/mesa/drivers/dri/i965/brw_nir.h >> @@ -137,6 +137,11 @@ void brw_nir_setup_arb_uniforms(nir_shader *shader, >> struct gl_program *prog, >> >> bool brw_nir_opt_peephole_ffma(nir_shader *shader); >> >> +#define BRW_NIR_FRAG_OUTPUT_INDEX_SHIFT 0 >> +#define BRW_NIR_FRAG_OUTPUT_INDEX_MASK INTEL_MASK(0, 0) >> +#define BRW_NIR_FRAG_OUTPUT_LOCATION_SHIFT 1 >> +#define BRW_NIR_FRAG_OUTPUT_LOCATION_MASK INTEL_MASK(31, 1) >> + >> #ifdef __cplusplus >> } >> #endif >>
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev