On Tue, Jan 6, 2015 at 1:04 AM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > > > On Sun, Jan 4, 2015 at 9:15 PM, Connor Abbott <cwabbo...@gmail.com> wrote: >> >> This is a general question for the interpolation support: >> >> Why are we using the variable-based intrinsics directly, instead of >> lowering it to something index-based in the lower_io pass just like we do >> for normal inputs? > > > I knew you were going to ask that question. The short answer: It's much > easier this way in the backend. In the future, we will probably want to do > that cleanup. However, I tried that several ways and it was a mess no > matter how I did it. So I gave up and just used the variables. > > --Jason
And the inevitable follow-up... what's the long answer? What problems did you run into? At the very least, you should document the problems you ran into as a comment somewhere so if someone notices this, they know why we had to do this hack and don't waste time rediscovering the problems. > >> >> On Tue, Dec 16, 2014 at 1:12 AM, Jason Ekstrand <ja...@jlekstrand.net> >> wrote: >>> >>> --- >>> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 102 >>> +++++++++++++++++++++++++++++++ >>> 1 file changed, 102 insertions(+) >>> >>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >>> index 46d855d..67714ec 100644 >>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >>> @@ -1410,6 +1410,108 @@ >>> fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) >>> break; >>> } >>> >>> + case nir_intrinsic_interp_var_at_centroid: >>> + case nir_intrinsic_interp_var_at_sample: >>> + case nir_intrinsic_interp_var_at_offset: { >>> + /* in SIMD16 mode, the pixel interpolator returns coords >>> interleaved >>> + * 8 channels at a time, same as the barycentric coords presented >>> in >>> + * the FS payload. this requires a bit of extra work to support. >>> + */ >>> + no16("interpolate_at_* not yet supported in SIMD16 mode."); >>> + >>> + fs_reg dst_x(GRF, virtual_grf_alloc(2), BRW_REGISTER_TYPE_F); >>> + fs_reg dst_y = offset(dst_x, 1); >>> + >>> + /* For most messages, we need one reg of ignored data; the >>> hardware >>> + * requires mlen==1 even when there is no payload. in the per-slot >>> + * offset case, we'll replace this with the proper source data. >>> + */ >>> + fs_reg src(this, glsl_type::float_type); >>> + int mlen = 1; /* one reg unless overriden */ >>> + fs_inst *inst; >>> + >>> + switch (instr->intrinsic) { >>> + case nir_intrinsic_interp_var_at_centroid: >>> + inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, >>> fs_reg(0u)); >>> + break; >>> + >>> + case nir_intrinsic_interp_var_at_sample: { >>> + /* XXX: We should probably handle non-constant sample id's */ >>> + nir_const_value *const_sample = >>> nir_src_as_const_value(instr->src[0]); >>> + assert(const_sample); >>> + unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0; >>> + inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src, >>> + fs_reg(msg_data)); >>> + break; >>> + } >>> + >>> + case nir_intrinsic_interp_var_at_offset: { >>> + nir_const_value *const_offset = >>> nir_src_as_const_value(instr->src[0]); >>> + >>> + if (const_offset) { >>> + unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & >>> 0xf; >>> + unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & >>> 0xf; >>> + >>> + inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, >>> src, >>> + fs_reg(off_x | (off_y << 4))); >>> + } else { >>> + src = fs_reg(this, glsl_type::ivec2_type); >>> + fs_reg offset_src = retype(get_nir_src(instr->src[0]), >>> + BRW_REGISTER_TYPE_F); >>> + for (int i = 0; i < 2; i++) { >>> + fs_reg temp(this, glsl_type::float_type); >>> + emit(MUL(temp, offset(offset_src, i), fs_reg(16.0f))); >>> + fs_reg itemp(this, glsl_type::int_type); >>> + emit(MOV(itemp, temp)); /* float to int */ >>> + >>> + /* Clamp the upper end of the range to +7/16. >>> + * ARB_gpu_shader5 requires that we support a maximum >>> offset >>> + * of +0.5, which isn't representable in a S0.4 value -- >>> if >>> + * we didn't clamp it, we'd end up with -8/16, which is >>> the >>> + * opposite of what the shader author wanted. >>> + * >>> + * This is legal due to ARB_gpu_shader5's quantization >>> + * rules: >>> + * >>> + * "Not all values of <offset> may be supported; x and y >>> + * offsets may be rounded to fixed-point values with the >>> + * number of fraction bits given by the >>> + * implementation-dependent constant >>> + * FRAGMENT_INTERPOLATION_OFFSET_BITS" >>> + */ >>> + >>> + emit(BRW_OPCODE_SEL, offset(src, i), itemp, fs_reg(7)) >>> + ->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, >>> 7) */ >>> + } >>> + >>> + mlen = 2; >>> + inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, >>> src, >>> + fs_reg(0u)); >>> + } >>> + break; >>> + } >>> + >>> + default: >>> + unreachable("Invalid intrinsic"); >>> + } >>> + >>> + inst->mlen = mlen; >>> + inst->regs_written = 2; /* 2 floats per slot returned */ >>> + inst->pi_noperspective = >>> instr->variables[0]->var->data.interpolation == >>> + INTERP_QUALIFIER_NOPERSPECTIVE; >>> + >>> + for (unsigned j = 0; j < instr->num_components; j++) { >>> + fs_reg src = >>> interp_reg(instr->variables[0]->var->data.location, j); >>> + src.type = dest.type; >>> + >>> + fs_inst *inst = emit(FS_OPCODE_LINTERP, dest, dst_x, dst_y, >>> src); >>> + if (instr->has_predicate) >>> + inst->predicate = BRW_PREDICATE_NORMAL; >>> + dest.reg_offset++; >>> + } >>> + break; >>> + } >>> + >>> case nir_intrinsic_store_output_indirect: >>> has_indirect = true; >>> case nir_intrinsic_store_output: { >>> -- >>> 2.2.0 >>> >>> _______________________________________________ >>> mesa-dev mailing list >>> mesa-dev@lists.freedesktop.org >>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev >> >> > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev