Am 01.02.2016 um 16:36 schrieb Brian Paul: > Just a few more nitpicks below... > > Reviewed-by: Brian Paul <bri...@vmware.com> > > On 01/30/2016 07:08 PM, srol...@vmware.com wrote: >> From: Roland Scheidegger <srol...@vmware.com> >> >> Add support for these opcodes, the conversion functions were already >> there albeit need some new packing stuff. >> Just like the tgsi version, piglit won't like it for all the same >> reasons, so it's disabled (UP2H passes piglit arb_shader_language_packing >> tests, albeit since PK2H won't due those rounding differences I don't >> know if that one works or not as the piglit test is rather difficult to >> deal with). >> --- >> src/gallium/auxiliary/gallivm/lp_bld_conv.c | 16 ++++- >> src/gallium/auxiliary/gallivm/lp_bld_pack.c | 26 ++++++++ >> src/gallium/auxiliary/gallivm/lp_bld_pack.h | 5 ++ >> src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 1 - >> src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 73 >> ++++++++++++++++++++++ >> 5 files changed, 119 insertions(+), 2 deletions(-) >> >> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c >> b/src/gallium/auxiliary/gallivm/lp_bld_conv.c >> index 7854142..7cf0dee 100644 >> --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c >> +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c >> @@ -130,6 +130,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm, >> * >> * Convert float32 to half floats, preserving Infs and NaNs, >> * with rounding towards zero (trunc). >> + * XXX: For GL, would prefer rounding towards nearest(-even). >> */ >> LLVMValueRef >> lp_build_float_to_half(struct gallivm_state *gallivm, >> @@ -143,6 +144,15 @@ lp_build_float_to_half(struct gallivm_state >> *gallivm, >> struct lp_type i16_type = lp_type_int_vec(16, 16 * length); >> LLVMValueRef result; >> >> + /* >> + * Note: Newer llvm versions (3.6 or so) support fptrunc to 16 bits >> + * directly, without any (x86 or generic) intrinsics. >> + * Albeit the rounding mode cannot be specified (and is undefined, >> + * though in practice on x86 seems to do nearest-even but it may >> + * be dependent on instruction set support), so is essentially >> + * useless. >> + */ >> + >> if (util_cpu_caps.has_f16c && >> (length == 4 || length == 8)) { >> struct lp_type i168_type = lp_type_int_vec(16, 16 * 8); >> @@ -187,7 +197,11 @@ lp_build_float_to_half(struct gallivm_state >> *gallivm, >> LLVMValueRef index = LLVMConstInt(i32t, i, 0); >> LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, >> index, ""); >> #if 0 >> - /* XXX: not really supported by backends */ >> + /* >> + * XXX: not really supported by backends. >> + * Even if they would now, rounding mode cannot be specified and >> + * is undefined. >> + */ >> LLVMValueRef f16 = lp_build_intrinsic_unary(builder, >> "llvm.convert.to.fp16", i16t, f32); >> #else >> LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, ""); >> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c >> b/src/gallium/auxiliary/gallivm/lp_bld_pack.c >> index 0b0f7f0..daa2043 100644 >> --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c >> +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c >> @@ -257,6 +257,32 @@ lp_build_concat_n(struct gallivm_state *gallivm, >> >> >> /** >> + * Un-interleave vector. >> + * This will return a vector consisting of every second element >> + * (depending on lo_hi, beginning at 0 or 1). >> + * The returned vector size (elems and width) will only be half >> + * that of the source vector. >> + */ >> +LLVMValueRef >> +lp_build_uninterleave1(struct gallivm_state *gallivm, >> + unsigned num_elems, >> + LLVMValueRef a, >> + unsigned lo_hi) >> +{ >> + LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH]; >> + unsigned i; >> + assert(num_elems <= LP_MAX_VECTOR_LENGTH); >> + >> + for(i = 0; i < num_elems / 2; ++i) > > space after for. Fixed.
> >> + elems[i] = lp_build_const_int32(gallivm, 2*i + lo_hi); >> + >> + shuffle = LLVMConstVector(elems, num_elems / 2); >> + >> + return LLVMBuildShuffleVector(gallivm->builder, a, a, shuffle, ""); >> +} >> + >> + >> +/** >> * Interleave vector elements. >> * >> * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions >> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h >> b/src/gallium/auxiliary/gallivm/lp_bld_pack.h >> index 7cede35..367fba1 100644 >> --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h >> +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h >> @@ -58,6 +58,11 @@ lp_build_interleave2(struct gallivm_state *gallivm, >> LLVMValueRef b, >> unsigned lo_hi); >> >> +LLVMValueRef >> +lp_build_uninterleave1(struct gallivm_state *gallivm, >> + unsigned num_elems, >> + LLVMValueRef a, >> + unsigned lo_hi); >> >> void >> lp_build_unpack2(struct gallivm_state *gallivm, >> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c >> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c >> index c88dfbf..1cbe47c 100644 >> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c >> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c >> @@ -248,7 +248,6 @@ lp_build_tgsi_inst_llvm( >> /* Ignore deprecated instructions */ >> switch (inst->Instruction.Opcode) { >> >> - case TGSI_OPCODE_UP2H: >> case TGSI_OPCODE_UP2US: >> case TGSI_OPCODE_UP4B: >> case TGSI_OPCODE_UP4UB: >> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c >> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c >> index 6f75bec..f6b42ee 100644 >> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c >> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c >> @@ -45,8 +45,10 @@ >> #include "lp_bld_arit.h" >> #include "lp_bld_bitarit.h" >> #include "lp_bld_const.h" >> +#include "lp_bld_conv.h" >> #include "lp_bld_gather.h" >> #include "lp_bld_logic.h" >> +#include "lp_bld_pack.h" >> >> #include "tgsi/tgsi_exec.h" >> >> @@ -530,6 +532,75 @@ static struct lp_build_tgsi_action log_action = { >> log_emit /* emit */ >> }; >> >> +/* TGSI_OPCODE_PK2H */ >> + >> +static void >> +pk2h_fetch_args( >> + struct lp_build_tgsi_context * bld_base, >> + struct lp_build_emit_data * emit_data) > > pk2h_fetch_args(struct lp_build_tgsi_context *bld_base, > struct lp_build_emit_data *emit_data) I didn't notice before, but there's over 100 functions in that file and every single one does it like this. The only functions not honoring that style are the new pk2h_emit and up2h_emit... So I suppose it would be better to make them wrong too for some consistency, not sure it's worth doing mass-whitespace fix... Roland >> +{ >> + /* src0.x */ >> + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, >> + 0, TGSI_CHAN_X); >> + /* src0.y */ >> + emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, >> + 0, TGSI_CHAN_Y); >> +} >> + >> +static void >> +pk2h_emit(const struct lp_build_tgsi_action *action, >> + struct lp_build_tgsi_context *bld_base, >> + struct lp_build_emit_data *emit_data) >> +{ >> + struct gallivm_state *gallivm = bld_base->base.gallivm; >> + struct lp_type f16i_t; >> + LLVMValueRef lo, hi, res; >> + >> + f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32); >> + lo = lp_build_float_to_half(gallivm, emit_data->args[0]); >> + hi = lp_build_float_to_half(gallivm, emit_data->args[1]); >> + /* maybe some interleave doubling vector width would be useful... */ >> + lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * >> 2); >> + hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * >> 2); >> + res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0); >> + >> + emit_data->output[emit_data->chan] = res; >> +} >> + >> +static struct lp_build_tgsi_action pk2h_action = { >> + pk2h_fetch_args, /* fetch_args */ >> + pk2h_emit /* emit */ >> +}; >> + >> +/* TGSI_OPCODE_UP2H */ >> + >> +static void >> +up2h_emit(const struct lp_build_tgsi_action *action, >> + struct lp_build_tgsi_context *bld_base, >> + struct lp_build_emit_data *emit_data) >> +{ >> + struct gallivm_state *gallivm = bld_base->base.gallivm; >> + LLVMBuilderRef builder = gallivm->builder; >> + LLVMContextRef context = gallivm->context; >> + LLVMValueRef lo, hi, res[2], arg; >> + unsigned nr = bld_base->base.type.length; >> + LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), >> nr * 2); >> + >> + arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, ""); >> + lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0); >> + hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1); >> + res[0] = lp_build_half_to_float(gallivm, lo); >> + res[1] = lp_build_half_to_float(gallivm, hi); >> + >> + emit_data->output[0] = emit_data->output[2] = res[0]; >> + emit_data->output[1] = emit_data->output[3] = res[1]; >> +} >> + >> +static struct lp_build_tgsi_action up2h_action = { >> + scalar_unary_fetch_args, /* fetch_args */ >> + up2h_emit /* emit */ >> +}; >> + >> /* TGSI_OPCODE_LRP */ >> >> static void >> @@ -1032,10 +1103,12 @@ lp_set_default_actions(struct >> lp_build_tgsi_context * bld_base) >> bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action; >> bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action; >> bld_base->op_actions[TGSI_OPCODE_LOG] = log_action; >> + bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action; >> bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action; >> bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action; >> bld_base->op_actions[TGSI_OPCODE_POW] = pow_action; >> bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action; >> + bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action; >> bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action; >> >> bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = >> scalar_unary_fetch_args; >> > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev