If a non-const sample number is given to interpolateAtSample it will now generate an indirect send message with the sample ID similar to how non-const sampler array indexing works. Previously non-const values were ignored and instead it ended up using a constant 0 value.
The generator will try to determine if the sample ID is dynamically uniform via nir_src_is_dynamically_uniform. If not it will query the pixel interpolator in a loop, once for each possible sample number. This is necessary because the indirect send message doesn't seem to have a way to specify a different value for each fragment. The range of possible sample numbers is determined using STATE_NUM_SAMPLES. When linking the shader it will now add a reference to this state if any dynamically non-uniform calls to interpolateAtSample are found. This fixes the following two Piglit tests: arb_gpu_shader5-interpolateAtSample-nonconst arb_gpu_shader5-interpolateAtSample-dynamically-nonuniform v2: Handle dynamically non-uniform sample ids. --- src/mesa/drivers/dri/i965/brw_eu.h | 2 +- src/mesa/drivers/dri/i965/brw_eu_emit.c | 34 ++++--- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 +- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 119 +++++++++++++++++++++---- src/mesa/drivers/dri/i965/brw_program.c | 54 +++++++++++ src/mesa/drivers/dri/i965/brw_program.h | 1 + src/mesa/drivers/dri/i965/brw_shader.cpp | 2 + 7 files changed, 185 insertions(+), 32 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 761aa0e..0ac1ad9 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -461,7 +461,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p, struct brw_reg mrf, bool noperspective, unsigned mode, - unsigned data, + struct brw_reg data, unsigned msg_length, unsigned response_length); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 4d39762..25524d4 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -3192,26 +3192,38 @@ brw_pixel_interpolator_query(struct brw_codegen *p, struct brw_reg mrf, bool noperspective, unsigned mode, - unsigned data, + struct brw_reg data, unsigned msg_length, unsigned response_length) { const struct brw_device_info *devinfo = p->devinfo; - struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); + struct brw_inst *insn; + uint16_t exec_size; - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, mrf); - brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR, - msg_length, response_length, - false /* header is never present for PI */, - false); + if (data.file == BRW_IMMEDIATE_VALUE) { + insn = next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, mrf); + brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR, + msg_length, response_length, + false /* header is never present for PI */, + false); + brw_inst_set_pi_message_data(devinfo, insn, data.dw1.ud); + } else { + insn = brw_send_indirect_message(p, + GEN7_SFID_PIXEL_INTERPOLATOR, + dest, + mrf, + vec1(data)); + brw_inst_set_mlen(devinfo, insn, msg_length); + brw_inst_set_rlen(devinfo, insn, response_length); + } - brw_inst_set_pi_simd_mode( - devinfo, insn, brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16); + exec_size = brw_inst_exec_size(devinfo, p->current); + brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16); brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px dispatch */ brw_inst_set_pi_nopersp(devinfo, insn, noperspective); brw_inst_set_pi_message_type(devinfo, insn, mode); - brw_inst_set_pi_message_data(devinfo, insn, data); } void diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index c86ca04..88dbc62 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1328,15 +1328,14 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst, struct brw_reg msg_data, unsigned msg_type) { - assert(msg_data.file == BRW_IMMEDIATE_VALUE && - msg_data.type == BRW_REGISTER_TYPE_UD); + assert(msg_data.type == BRW_REGISTER_TYPE_UD); brw_pixel_interpolator_query(p, retype(dst, BRW_REGISTER_TYPE_UW), src, inst->pi_noperspective, msg_type, - msg_data.dw1.ud, + msg_data, inst->mlen, inst->regs_written); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index ee964a0..0fdb9ae 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1167,6 +1167,47 @@ fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst, } } +/* For most messages, we need one reg of ignored data; the hardware requires + * mlen==1 even when there is no payload. in the per-slot offset case, we'll + * replace this with the proper source data. + */ +static void +setup_pixel_interpolater_instruction(fs_visitor *v, + nir_intrinsic_instr *instr, + fs_inst *inst, + int mlen = 1) +{ + inst->mlen = mlen; + /* 2 floats per slot returned */ + inst->regs_written = 2 * v->dispatch_width / 8; + inst->pi_noperspective = instr->variables[0]->var->data.interpolation == + INTERP_QUALIFIER_NOPERSPECTIVE; +} + +static fs_reg +get_num_samples_reg(fs_visitor *v) +{ + struct gl_program_parameter_list *params = v->prog->Parameters; + static gl_state_index tokens[STATE_LENGTH] = { + STATE_NUM_SAMPLES + }; + GLuint index = _mesa_add_state_reference(params, tokens); + unsigned i; + + /* Try to find an existing copy of the uniform */ + for (i = 0; i < v->uniforms; i++) { + if (v->stage_prog_data->param[i] == + &v->prog->Parameters->ParameterValues[index][0]) + goto found; + } + + v->stage_prog_data->param[v->uniforms++] = + &v->prog->Parameters->ParameterValues[index][0]; + +found: + return retype(fs_reg(UNIFORM, i), BRW_REGISTER_TYPE_UD); +} + void fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) { @@ -1438,27 +1479,73 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2); - /* For most messages, we need one reg of ignored data; the hardware - * requires mlen==1 even when there is no payload. in the per-slot - * offset case, we'll replace this with the proper source data. - */ fs_reg src = vgrf(glsl_type::float_type); - int mlen = 1; /* one reg unless overriden */ fs_inst *inst; switch (instr->intrinsic) { case nir_intrinsic_interp_var_at_centroid: inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u)); + setup_pixel_interpolater_instruction(this, instr, inst); break; case nir_intrinsic_interp_var_at_sample: { - /* XXX: We should probably handle non-constant sample id's */ nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]); - assert(const_sample); - unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0; - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, - fs_reg(msg_data)); + + if (const_sample) { + unsigned msg_data = const_sample->i[0] << 4; + + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, + fs_reg(msg_data)); + + setup_pixel_interpolater_instruction(this, instr, inst); + } else { + fs_reg sample_src = retype(get_nir_src(instr->src[0]), + BRW_REGISTER_TYPE_UD); + fs_reg sample_id_reg; + + if (nir_src_is_dynamically_uniform(instr->src[0])) { + sample_id_reg = vgrf(glsl_type::uint_type); + bld.SHL(sample_id_reg, sample_src, fs_reg(4u)); + sample_id_reg = bld.emit_uniformize(sample_id_reg); + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, + sample_id_reg); + setup_pixel_interpolater_instruction(this, instr, inst); + } else { + /* Make a loop that sends a message to the pixel interpolator + * for each possible sample number so that each individual + * message will be dynamically uniform. The number of samples + * is determined by accessing the STATE_NUM_SAMPLES state var. + */ + fs_reg i_reg = vgrf(glsl_type::uint_type); + fs_reg sample_id_reg = vgrf(glsl_type::uint_type); + fs_reg num_samples_reg = get_num_samples_reg(this); + + bld.MOV(i_reg, fs_reg(0u)); + + bld.emit(BRW_OPCODE_DO); + + bld.CMP(bld.null_reg_ud(), + sample_src, i_reg, + BRW_CONDITIONAL_EQ); + bld.IF(BRW_PREDICATE_NORMAL); + bld.SHL(sample_id_reg, i_reg, fs_reg(4u)); + sample_id_reg = bld.emit_uniformize(sample_id_reg); + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, + sample_id_reg); + setup_pixel_interpolater_instruction(this, instr, inst); + bld.emit(BRW_OPCODE_ENDIF); + + bld.ADD(i_reg, i_reg, fs_reg(1u)); + bld.CMP(bld.null_reg_ud(), + i_reg, num_samples_reg, + BRW_CONDITIONAL_GE); + inst = bld.emit(BRW_OPCODE_BREAK); + inst->predicate = BRW_PREDICATE_NORMAL; + bld.emit(BRW_OPCODE_WHILE); + } + } + break; } @@ -1471,6 +1558,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src, fs_reg(off_x | (off_y << 4))); + setup_pixel_interpolater_instruction(this, instr, inst); } else { src = vgrf(glsl_type::ivec2_type); fs_reg offset_src = retype(get_nir_src(instr->src[0]), @@ -1500,9 +1588,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr bld.SEL(offset(src, bld, i), itemp, fs_reg(7))); } - mlen = 2 * dispatch_width / 8; inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src, fs_reg(0u)); + setup_pixel_interpolater_instruction(this, + instr, + inst, + 2 * dispatch_width / 8); } break; } @@ -1511,12 +1602,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr unreachable("Invalid intrinsic"); } - inst->mlen = mlen; - /* 2 floats per slot returned */ - inst->regs_written = 2 * dispatch_width / 8; - inst->pi_noperspective = instr->variables[0]->var->data.interpolation == - INTERP_QUALIFIER_NOPERSPECTIVE; - for (unsigned j = 0; j < instr->num_components; j++) { fs_reg src = interp_reg(instr->variables[0]->var->data.location, j); src.type = dest.type; diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 467a893..6430c5d 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -146,6 +146,8 @@ brwProgramStringNotify(struct gl_context *ctx, prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true); } + brw_add_interpolate_at_sample_params(prog); + brw_fs_precompile(ctx, NULL, prog); break; } @@ -246,6 +248,58 @@ brw_add_texrect_params(struct gl_program *prog) } } +static bool +find_interpolate_at_sample_in_block(nir_block *block, + void *data) +{ + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrinsic_instr = nir_instr_as_intrinsic(instr); + + if (intrinsic_instr->intrinsic != nir_intrinsic_interp_var_at_sample) + continue; + + /* If the sample number is known to be dynamically uniform then + * the generator won't need the num_samples state. + */ + if (nir_src_is_dynamically_uniform(intrinsic_instr->src[0])) + continue; + + return false; + } + + return true; +} + +void +brw_add_interpolate_at_sample_params(struct gl_program *prog) +{ + static gl_state_index tokens[STATE_LENGTH] = { + STATE_NUM_SAMPLES + }; + + if (!prog->nir) + return; + + /* If anything calls interpolateAtSample with a dynamically non-uniform + * sample ID then we need STATE_NUM_SAMPLES to be able to iterate over + * each possible value. + */ + nir_foreach_overload(prog->nir, overload) { + if (overload->impl) { + bool found = !nir_foreach_block(overload->impl, + find_interpolate_at_sample_in_block, + ralloc_parent(overload->impl)); + if (found) { + _mesa_add_state_reference(prog->Parameters, tokens); + break; + } + } + } +} + /* Per-thread scratch space is a power-of-two multiple of 1KB. */ int brw_get_scratch_size(int size) diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h index eaa7e4e..c41ee96 100644 --- a/src/mesa/drivers/dri/i965/brw_program.h +++ b/src/mesa/drivers/dri/i965/brw_program.h @@ -167,6 +167,7 @@ bool brw_debug_recompile_sampler_key(struct brw_context *brw, const struct brw_sampler_prog_key_data *old_key, const struct brw_sampler_prog_key_data *key); void brw_add_texrect_params(struct gl_program *prog); +void brw_add_interpolate_at_sample_params(struct gl_program *prog); void brw_mark_surface_used(struct brw_stage_prog_data *prog_data, diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index bccf8d6..c76f4f7 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -407,6 +407,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) is_scalar_shader_stage(brw, stage)); } + brw_add_interpolate_at_sample_params(prog); + _mesa_reference_program(ctx, &prog, NULL); } -- 1.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev