Matt Turner <matts...@gmail.com> writes: > On Fri, Aug 7, 2015 at 9:31 AM, Neil Roberts <n...@linux.intel.com> wrote: >> If a non-const sample number is given to interpolateAtSample it will >> now generate an indirect send message with the sample ID similar to >> how non-const sampler array indexing works. Previously non-const >> values were ignored and instead it ended up using a constant 0 value. >> >> The generator will try to determine if the sample ID is dynamically >> uniform via nir_src_is_dynamically_uniform. If not it will query the >> pixel interpolator in a loop, once for each possible sample number. >> This is necessary because the indirect send message doesn't seem to >> have a way to specify a different value for each fragment. > > Heh, I think this was the solution Curro had in mind a couple of years ago. :) > > I've Cc'd him. It'd be good for him to review this. > Sigh, it's really awful that our hardware only supports a single sample index for the whole SIMD thread... I was thinking though that there might be a better alternative to running the sample-index interpolator query in a loop: The "Per Slot Offset" interpolator query does allow independent offsets for each channel, couldn't we pass the hard-coded table of sample offsets (see brw_multisample_state.h) to the shader (e.g. as push constants or hard-coded as vector immediates), use VxH indirect indexing to fetch the right offset for each channel and then lower it into a non-dynamically-uniform interpolateAtOffset, which the hardware can handle natively?
>> The range of possible sample numbers is determined using >> STATE_NUM_SAMPLES. When linking the shader it will now add a reference >> to this state if any dynamically non-uniform calls to >> interpolateAtSample are found. >> >> This fixes the following two Piglit tests: >> >> arb_gpu_shader5-interpolateAtSample-nonconst >> arb_gpu_shader5-interpolateAtSample-dynamically-nonuniform >> >> v2: Handle dynamically non-uniform sample ids. >> --- >> src/mesa/drivers/dri/i965/brw_eu.h | 2 +- >> src/mesa/drivers/dri/i965/brw_eu_emit.c | 34 ++++--- >> src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 +- >> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 119 >> +++++++++++++++++++++---- >> src/mesa/drivers/dri/i965/brw_program.c | 54 +++++++++++ >> src/mesa/drivers/dri/i965/brw_program.h | 1 + >> src/mesa/drivers/dri/i965/brw_shader.cpp | 2 + >> 7 files changed, 185 insertions(+), 32 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_eu.h >> b/src/mesa/drivers/dri/i965/brw_eu.h >> index 761aa0e..0ac1ad9 100644 >> --- a/src/mesa/drivers/dri/i965/brw_eu.h >> +++ b/src/mesa/drivers/dri/i965/brw_eu.h >> @@ -461,7 +461,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p, >> struct brw_reg mrf, >> bool noperspective, >> unsigned mode, >> - unsigned data, >> + struct brw_reg data, >> unsigned msg_length, >> unsigned response_length); >> >> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c >> b/src/mesa/drivers/dri/i965/brw_eu_emit.c >> index 4d39762..25524d4 100644 >> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c >> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c >> @@ -3192,26 +3192,38 @@ brw_pixel_interpolator_query(struct brw_codegen *p, >> struct brw_reg mrf, >> bool noperspective, >> unsigned mode, >> - unsigned data, >> + struct brw_reg data, >> unsigned msg_length, >> unsigned response_length) >> { >> const struct brw_device_info *devinfo = p->devinfo; >> - struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); >> + struct brw_inst *insn; >> + uint16_t exec_size; >> >> - brw_set_dest(p, insn, dest); >> - brw_set_src0(p, insn, mrf); >> - brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR, >> - msg_length, response_length, >> - false /* header is never present for PI */, >> - false); >> + if (data.file == BRW_IMMEDIATE_VALUE) { >> + insn = next_insn(p, BRW_OPCODE_SEND); >> + brw_set_dest(p, insn, dest); >> + brw_set_src0(p, insn, mrf); >> + brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR, >> + msg_length, response_length, >> + false /* header is never present for PI */, >> + false); >> + brw_inst_set_pi_message_data(devinfo, insn, data.dw1.ud); >> + } else { >> + insn = brw_send_indirect_message(p, >> + GEN7_SFID_PIXEL_INTERPOLATOR, >> + dest, >> + mrf, >> + vec1(data)); >> + brw_inst_set_mlen(devinfo, insn, msg_length); >> + brw_inst_set_rlen(devinfo, insn, response_length); >> + } >> >> - brw_inst_set_pi_simd_mode( >> - devinfo, insn, brw_inst_exec_size(devinfo, insn) == >> BRW_EXECUTE_16); >> + exec_size = brw_inst_exec_size(devinfo, p->current); >> + brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16); >> brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px >> dispatch */ >> brw_inst_set_pi_nopersp(devinfo, insn, noperspective); >> brw_inst_set_pi_message_type(devinfo, insn, mode); >> - brw_inst_set_pi_message_data(devinfo, insn, data); >> } >> >> void >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >> index c86ca04..88dbc62 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >> @@ -1328,15 +1328,14 @@ >> fs_generator::generate_pixel_interpolator_query(fs_inst *inst, >> struct brw_reg msg_data, >> unsigned msg_type) >> { >> - assert(msg_data.file == BRW_IMMEDIATE_VALUE && >> - msg_data.type == BRW_REGISTER_TYPE_UD); >> + assert(msg_data.type == BRW_REGISTER_TYPE_UD); >> >> brw_pixel_interpolator_query(p, >> retype(dst, BRW_REGISTER_TYPE_UW), >> src, >> inst->pi_noperspective, >> msg_type, >> - msg_data.dw1.ud, >> + msg_data, >> inst->mlen, >> inst->regs_written); >> } >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> index ee964a0..0fdb9ae 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> @@ -1167,6 +1167,47 @@ fs_visitor::emit_percomp(const fs_builder &bld, const >> fs_inst &inst, >> } >> } >> >> +/* For most messages, we need one reg of ignored data; the hardware requires >> + * mlen==1 even when there is no payload. in the per-slot offset case, we'll >> + * replace this with the proper source data. >> + */ >> +static void >> +setup_pixel_interpolater_instruction(fs_visitor *v, >> + nir_intrinsic_instr *instr, >> + fs_inst *inst, >> + int mlen = 1) >> +{ >> + inst->mlen = mlen; >> + /* 2 floats per slot returned */ >> + inst->regs_written = 2 * v->dispatch_width / 8; >> + inst->pi_noperspective = instr->variables[0]->var->data.interpolation == >> + INTERP_QUALIFIER_NOPERSPECTIVE; >> +} >> + >> +static fs_reg >> +get_num_samples_reg(fs_visitor *v) >> +{ >> + struct gl_program_parameter_list *params = v->prog->Parameters; >> + static gl_state_index tokens[STATE_LENGTH] = { > > I suspect this isn't thread-safe. > >> + STATE_NUM_SAMPLES >> + }; >> + GLuint index = _mesa_add_state_reference(params, tokens); >> + unsigned i; >> + >> + /* Try to find an existing copy of the uniform */ >> + for (i = 0; i < v->uniforms; i++) { >> + if (v->stage_prog_data->param[i] == >> + &v->prog->Parameters->ParameterValues[index][0]) >> + goto found; >> + } >> + >> + v->stage_prog_data->param[v->uniforms++] = >> + &v->prog->Parameters->ParameterValues[index][0]; >> + >> +found: >> + return retype(fs_reg(UNIFORM, i), BRW_REGISTER_TYPE_UD); >> +} >> + >> void >> fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr >> *instr) >> { >> @@ -1438,27 +1479,73 @@ fs_visitor::nir_emit_intrinsic(const fs_builder >> &bld, nir_intrinsic_instr *instr >> >> fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2); >> >> - /* For most messages, we need one reg of ignored data; the hardware >> - * requires mlen==1 even when there is no payload. in the per-slot >> - * offset case, we'll replace this with the proper source data. >> - */ >> fs_reg src = vgrf(glsl_type::float_type); >> - int mlen = 1; /* one reg unless overriden */ >> fs_inst *inst; >> >> switch (instr->intrinsic) { >> case nir_intrinsic_interp_var_at_centroid: >> inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, >> dst_xy, src, fs_reg(0u)); >> + setup_pixel_interpolater_instruction(this, instr, inst); >> break; >> >> case nir_intrinsic_interp_var_at_sample: { >> - /* XXX: We should probably handle non-constant sample id's */ >> nir_const_value *const_sample = >> nir_src_as_const_value(instr->src[0]); >> - assert(const_sample); >> - unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0; >> - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, >> - fs_reg(msg_data)); >> + >> + if (const_sample) { >> + unsigned msg_data = const_sample->i[0] << 4; >> + >> + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, >> + fs_reg(msg_data)); >> + >> + setup_pixel_interpolater_instruction(this, instr, inst); >> + } else { >> + fs_reg sample_src = retype(get_nir_src(instr->src[0]), >> + BRW_REGISTER_TYPE_UD); >> + fs_reg sample_id_reg; >> + >> + if (nir_src_is_dynamically_uniform(instr->src[0])) { >> + sample_id_reg = vgrf(glsl_type::uint_type); >> + bld.SHL(sample_id_reg, sample_src, fs_reg(4u)); >> + sample_id_reg = bld.emit_uniformize(sample_id_reg); >> + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, >> + sample_id_reg); >> + setup_pixel_interpolater_instruction(this, instr, inst); >> + } else { >> + /* Make a loop that sends a message to the pixel interpolator >> + * for each possible sample number so that each individual >> + * message will be dynamically uniform. The number of samples >> + * is determined by accessing the STATE_NUM_SAMPLES state >> var. >> + */ >> + fs_reg i_reg = vgrf(glsl_type::uint_type); >> + fs_reg sample_id_reg = vgrf(glsl_type::uint_type); >> + fs_reg num_samples_reg = get_num_samples_reg(this); >> + >> + bld.MOV(i_reg, fs_reg(0u)); >> + >> + bld.emit(BRW_OPCODE_DO); >> + >> + bld.CMP(bld.null_reg_ud(), >> + sample_src, i_reg, >> + BRW_CONDITIONAL_EQ); > > I think you might be able to put all of this on one line. > >> + bld.IF(BRW_PREDICATE_NORMAL); >> + bld.SHL(sample_id_reg, i_reg, fs_reg(4u)); >> + sample_id_reg = bld.emit_uniformize(sample_id_reg); >> + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, >> + sample_id_reg); >> + setup_pixel_interpolater_instruction(this, instr, inst); >> + bld.emit(BRW_OPCODE_ENDIF); >> + >> + bld.ADD(i_reg, i_reg, fs_reg(1u)); >> + bld.CMP(bld.null_reg_ud(), >> + i_reg, num_samples_reg, >> + BRW_CONDITIONAL_GE); > > I think you might be able to put all of this on one line. > > If you want, you could reverse the order of the loop (that is, iterate > from num_samples_reg down to 0) and then save the > opt_cmod_propagation() pass some work and just emit an ADD with a a > conditional_mod in order to get rid of the CMP instruction. > >> + inst = bld.emit(BRW_OPCODE_BREAK); >> + inst->predicate = BRW_PREDICATE_NORMAL; >> + bld.emit(BRW_OPCODE_WHILE); > > You can actually get rid of the BREAK by predicating the WHILE (and > setting predicate_inverse on it). There's also a nice > set_predicate(predicate, inst) function you should use. > > If you want to do that, I think it'll take a small amount of work in > the BRW_OPCODE_WHILE case in brw_cfg.cpp to understand that a > predicated WHILE has two successors. > >> + } >> + } >> + >> break; >> } >> >> @@ -1471,6 +1558,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, >> nir_intrinsic_instr *instr >> >> inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, >> src, >> fs_reg(off_x | (off_y << 4))); >> + setup_pixel_interpolater_instruction(this, instr, inst); >> } else { >> src = vgrf(glsl_type::ivec2_type); >> fs_reg offset_src = retype(get_nir_src(instr->src[0]), >> @@ -1500,9 +1588,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, >> nir_intrinsic_instr *instr >> bld.SEL(offset(src, bld, i), itemp, fs_reg(7))); >> } >> >> - mlen = 2 * dispatch_width / 8; >> inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, >> dst_xy, src, >> fs_reg(0u)); >> + setup_pixel_interpolater_instruction(this, >> + instr, >> + inst, >> + 2 * dispatch_width / 8); >> } >> break; >> } >> @@ -1511,12 +1602,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, >> nir_intrinsic_instr *instr >> unreachable("Invalid intrinsic"); >> } >> >> - inst->mlen = mlen; >> - /* 2 floats per slot returned */ >> - inst->regs_written = 2 * dispatch_width / 8; >> - inst->pi_noperspective = instr->variables[0]->var->data.interpolation >> == >> - INTERP_QUALIFIER_NOPERSPECTIVE; >> - >> for (unsigned j = 0; j < instr->num_components; j++) { >> fs_reg src = interp_reg(instr->variables[0]->var->data.location, >> j); >> src.type = dest.type; >> diff --git a/src/mesa/drivers/dri/i965/brw_program.c >> b/src/mesa/drivers/dri/i965/brw_program.c >> index 467a893..6430c5d 100644 >> --- a/src/mesa/drivers/dri/i965/brw_program.c >> +++ b/src/mesa/drivers/dri/i965/brw_program.c >> @@ -146,6 +146,8 @@ brwProgramStringNotify(struct gl_context *ctx, >> prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, >> true); >> } >> >> + brw_add_interpolate_at_sample_params(prog); >> + >> brw_fs_precompile(ctx, NULL, prog); >> break; >> } >> @@ -246,6 +248,58 @@ brw_add_texrect_params(struct gl_program *prog) >> } >> } >> >> +static bool >> +find_interpolate_at_sample_in_block(nir_block *block, >> + void *data) >> +{ >> + nir_foreach_instr_safe(block, instr) { > > I don't think you need _safe here. At least I can't see why it's necessary. > >> + if (instr->type != nir_instr_type_intrinsic) >> + continue; >> + >> + nir_intrinsic_instr *intrinsic_instr = nir_instr_as_intrinsic(instr); >> + >> + if (intrinsic_instr->intrinsic != nir_intrinsic_interp_var_at_sample) >> + continue; >> + >> + /* If the sample number is known to be dynamically uniform then >> + * the generator won't need the num_samples state. >> + */ >> + if (nir_src_is_dynamically_uniform(intrinsic_instr->src[0])) >> + continue; >> + >> + return false; >> + } >> + >> + return true; >> +} >> + >> +void >> +brw_add_interpolate_at_sample_params(struct gl_program *prog) >> +{ >> + static gl_state_index tokens[STATE_LENGTH] = { > > I suspect this isn't thread-safe. > >> + STATE_NUM_SAMPLES >> + };
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev