From: Marek Olšák <marek.ol...@amd.com> use DP3 instead. --- src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 29 ------------ src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c | 3 -- src/gallium/auxiliary/nir/tgsi_to_nir.c | 14 ------ src/gallium/auxiliary/tgsi/tgsi_exec.c | 29 ------------ src/gallium/auxiliary/tgsi/tgsi_info.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_lowering.c | 30 +------------ src/gallium/auxiliary/tgsi/tgsi_lowering.h | 1 - src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 1 - src/gallium/auxiliary/tgsi/tgsi_util.c | 4 -- src/gallium/docs/source/tgsi.rst | 13 ------ src/gallium/drivers/etnaviv/etnaviv_compiler.c | 1 - src/gallium/drivers/r300/r300_tgsi_to_rc.c | 1 - src/gallium/drivers/r600/r600_shader.c | 6 +-- src/gallium/drivers/svga/svga_tgsi_insn.c | 1 - src/gallium/drivers/svga/svga_tgsi_vgpu10.c | 51 ---------------------- src/gallium/include/pipe/p_shader_tokens.h | 2 +- src/mesa/state_tracker/st_atifs_to_tgsi.c | 1 - 17 files changed, 6 insertions(+), 183 deletions(-)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c index dc6568a..0319b88 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c @@ -152,48 +152,20 @@ dp2_emit( emit_data->args[3] /* src1.y */); emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1); } static struct lp_build_tgsi_action dp2_action = { dp2_fetch_args, /* fetch_args */ dp2_emit /* emit */ }; -/* TGSI_OPCODE_DP2A */ -static void -dp2a_fetch_args( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - dp_fetch_args(bld_base, emit_data, 2); - emit_data->args[5] = lp_build_emit_fetch(bld_base, emit_data->inst, - 2, TGSI_CHAN_X); -} - -static void -dp2a_emit( - const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - LLVMValueRef tmp; - tmp = lp_build_emit_llvm(bld_base, TGSI_OPCODE_DP2, emit_data); - emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, - emit_data->args[5], tmp); -} - -static struct lp_build_tgsi_action dp2a_action = { - dp2a_fetch_args, /* fetch_args */ - dp2a_emit /* emit */ -}; - /* TGSI_OPCODE_DP3 */ static void dp3_fetch_args( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { dp_fetch_args(bld_base, emit_data, 3); } static void @@ -1279,21 +1251,20 @@ u642d_emit( emit_data->args[0], bld_base->dbl_bld.vec_type, ""); } void lp_set_default_actions(struct lp_build_tgsi_context * bld_base) { bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action; bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action; bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action; - bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action; bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action; bld_base->op_actions[TGSI_OPCODE_DST] = dst_action; bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action; bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action; bld_base->op_actions[TGSI_OPCODE_LOG] = log_action; bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action; bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action; bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action; bld_base->op_actions[TGSI_OPCODE_POW] = pow_action; bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index 92ecb43..3cc5079 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -547,23 +547,20 @@ lp_emit_instruction_aos( case TGSI_OPCODE_LRP: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2); tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0); dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); break; - case TGSI_OPCODE_DP2A: - return FALSE; - case TGSI_OPCODE_FRC: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = lp_build_floor(&bld->bld_base.base, src0); dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0); break; case TGSI_OPCODE_FLR: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_floor(&bld->bld_base.base, src0); break; diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 46238a1..3daa896 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -994,29 +994,20 @@ ttn_xpd(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) ttn_swizzle(b, src[0], Y, Z, X, X), ttn_swizzle(b, src[1], Z, X, Y, X)), nir_fmul(b, ttn_swizzle(b, src[1], Y, Z, X, X), ttn_swizzle(b, src[0], Z, X, Y, X))), TGSI_WRITEMASK_XYZ); ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W); } static void -ttn_dp2a(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) -{ - ttn_move_dest(b, dest, - ttn_channel(b, nir_fadd(b, nir_fdot2(b, src[0], src[1]), - src[2]), - X)); -} - -static void ttn_dp2(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) { ttn_move_dest(b, dest, nir_fdot2(b, src[0], src[1])); } static void ttn_dp3(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) { ttn_move_dest(b, dest, nir_fdot3(b, src[0], src[1])); } @@ -1529,21 +1520,20 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_DP3] = 0, [TGSI_OPCODE_DP4] = 0, [TGSI_OPCODE_DST] = 0, [TGSI_OPCODE_MIN] = nir_op_fmin, [TGSI_OPCODE_MAX] = nir_op_fmax, [TGSI_OPCODE_SLT] = nir_op_slt, [TGSI_OPCODE_SGE] = nir_op_sge, [TGSI_OPCODE_MAD] = nir_op_ffma, [TGSI_OPCODE_LRP] = 0, [TGSI_OPCODE_SQRT] = nir_op_fsqrt, - [TGSI_OPCODE_DP2A] = 0, [TGSI_OPCODE_FRC] = nir_op_ffract, [TGSI_OPCODE_FLR] = nir_op_ffloor, [TGSI_OPCODE_ROUND] = nir_op_fround_even, [TGSI_OPCODE_EX2] = nir_op_fexp2, [TGSI_OPCODE_LG2] = nir_op_flog2, [TGSI_OPCODE_POW] = nir_op_fpow, [TGSI_OPCODE_XPD] = 0, [TGSI_OPCODE_DPH] = 0, [TGSI_OPCODE_COS] = nir_op_fcos, [TGSI_OPCODE_DDX] = nir_op_fddx, @@ -1766,24 +1756,20 @@ ttn_emit_instruction(struct ttn_compile *c) break; case TGSI_OPCODE_DP3: ttn_dp3(b, op_trans[tgsi_op], dest, src); break; case TGSI_OPCODE_DP4: ttn_dp4(b, op_trans[tgsi_op], dest, src); break; - case TGSI_OPCODE_DP2A: - ttn_dp2a(b, op_trans[tgsi_op], dest, src); - break; - case TGSI_OPCODE_DPH: ttn_dph(b, op_trans[tgsi_op], dest, src); break; case TGSI_OPCODE_UMAD: ttn_umad(b, op_trans[tgsi_op], dest, src); break; case TGSI_OPCODE_LRP: ttn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0])); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index cc3e232..34a4af6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -3178,45 +3178,20 @@ exec_dp4(struct tgsi_exec_machine *mach, } for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { if (inst->Dst[0].Register.WriteMask & (1 << chan)) { store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); } } } static void -exec_dp2a(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - unsigned int chan; - union tgsi_exec_channel arg[3]; - - fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_mul(&arg[2], &arg[0], &arg[1]); - - fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); - - fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_add(&arg[0], &arg[0], &arg[1]); - - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); - } - } -} - -static void exec_dph(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { unsigned int chan; union tgsi_exec_channel arg[3]; fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); micro_mul(&arg[2], &arg[0], &arg[1]); @@ -5176,24 +5151,20 @@ exec_instruction( break; case TGSI_OPCODE_LRP: exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SQRT: exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_DP2A: - exec_dp2a(mach, inst); - break; - case TGSI_OPCODE_FRC: exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_FLR: exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_ROUND: exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 8450cd7..c31705a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -51,21 +51,21 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 0, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST }, { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN }, { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE }, { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX_LZ", TGSI_OPCODE_TEX_LZ }, { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, - { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A }, + { 1, 3, 0, 0, 0, 0, 0, REPL, "", 21 }, /* removed */ { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 }, { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 }, { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC }, { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF_LZ", TGSI_OPCODE_TXF_LZ }, { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR }, { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND }, { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 }, { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 }, { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW }, { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD }, diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c index c26c13b..3013a41 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c @@ -913,59 +913,52 @@ transform_log(struct tgsi_transform_context *tctx, * * DP3 - 3-component Dot Product * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z * * DPH - Homogeneous Dot Product * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w * * DP2 - 2-component Dot Product * dst = src0.x \times src1.x + src0.y \times src1.y * - * DP2A - 2-component Dot Product And Add - * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x - * * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar * operations, which is what you'd prefer for a ISA that is natively * scalar. Probably a native vector ISA would at least already have * DP4/DP3 instructions, but perhaps there is room for an alternative - * translation for DPH/DP2/DP2A using vector instructions. + * translation for DPH/DP2 using vector instructions. * * ; needs: 1 tmp * MUL tmpA.x, src0.x, src1.x * MAD tmpA.x, src0.y, src1.y, tmpA.x * if (DPH || DP3 || DP4) { * MAD tmpA.x, src0.z, src1.z, tmpA.x * if (DPH) { * ADD tmpA.x, src1.w, tmpA.x * } else if (DP4) { * MAD tmpA.x, src0.w, src1.w, tmpA.x * } - * } else if (DP2A) { - * ADD tmpA.x, src2.x, tmpA.x * } * ; fixup last instruction to replicate into dst */ #define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2)) #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2)) #define DPH_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2)) #define DP2_GROW (NINST(2) + NINST(3) - OINST(2)) -#define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3)) #define DOTP_TMP 1 static void transform_dotp(struct tgsi_transform_context *tctx, struct tgsi_full_instruction *inst) { struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); struct tgsi_full_dst_register *dst = &inst->Dst[0]; struct tgsi_full_src_register *src0 = &inst->Src[0]; struct tgsi_full_src_register *src1 = &inst->Src[1]; - struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */ struct tgsi_full_instruction new_inst; unsigned opcode = inst->Instruction.Opcode; /* NOTE: any potential last instruction must replicate src on all * components (since it could be re-written to write to final dst) */ if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { /* MUL tmpA.x, src0.x, src1.x */ new_inst = tgsi_default_full_instruction(); @@ -1019,31 +1012,20 @@ transform_dotp(struct tgsi_transform_context *tctx, /* MAD tmpA.x, src0.w, src1.w, tmpA.x */ new_inst = tgsi_default_full_instruction(); new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 3; reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W)); reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W)); reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); } - } else if (opcode == TGSI_OPCODE_DP2A) { - tctx->emit_instruction(tctx, &new_inst); - - /* ADD tmpA.x, src2.x, tmpA.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); - new_inst.Instruction.NumSrcRegs = 2; - reg_src(&new_inst.Src[0], src2, SWIZ(X, X, X, X)); - reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X)); } /* fixup last instruction to write to dst: */ reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); tctx->emit_instruction(tctx, &new_inst); } } /* FLR - floor, CEIL - ceil @@ -1555,25 +1537,20 @@ transform_instr(struct tgsi_transform_context *tctx, case TGSI_OPCODE_DPH: if (!ctx->config->lower_DPH) goto skip; transform_dotp(tctx, inst); break; case TGSI_OPCODE_DP2: if (!ctx->config->lower_DP2) goto skip; transform_dotp(tctx, inst); break; - case TGSI_OPCODE_DP2A: - if (!ctx->config->lower_DP2A) - goto skip; - transform_dotp(tctx, inst); - break; case TGSI_OPCODE_FLR: if (!ctx->config->lower_FLR) goto skip; transform_flr_ceil(tctx, inst); break; case TGSI_OPCODE_CEIL: if (!ctx->config->lower_CEIL) goto skip; transform_flr_ceil(tctx, inst); break; @@ -1650,21 +1627,20 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, OPCS(LRP) || OPCS(FRC) || OPCS(POW) || OPCS(LIT) || OPCS(EXP) || OPCS(LOG) || OPCS(DP4) || OPCS(DP3) || OPCS(DPH) || OPCS(DP2) || - OPCS(DP2A) || OPCS(FLR) || OPCS(CEIL) || OPCS(TRUNC) || OPCS(TXP) || ctx.two_side_colors || ctx.saturate)) return NULL; #if 0 /* debug */ _debug_printf("BEFORE:"); @@ -1718,24 +1694,20 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, numtmp = MAX2(numtmp, DOTP_TMP); } if (OPCS(DPH)) { newlen += DPH_GROW * OPCS(DPH); numtmp = MAX2(numtmp, DOTP_TMP); } if (OPCS(DP2)) { newlen += DP2_GROW * OPCS(DP2); numtmp = MAX2(numtmp, DOTP_TMP); } - if (OPCS(DP2A)) { - newlen += DP2A_GROW * OPCS(DP2A); - numtmp = MAX2(numtmp, DOTP_TMP); - } if (OPCS(FLR)) { newlen += FLR_GROW * OPCS(FLR); numtmp = MAX2(numtmp, FLR_TMP); } if (OPCS(CEIL)) { newlen += CEIL_GROW * OPCS(CEIL); numtmp = MAX2(numtmp, CEIL_TMP); } if (OPCS(TRUNC)) { newlen += TRUNC_GROW * OPCS(TRUNC); diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.h b/src/gallium/auxiliary/tgsi/tgsi_lowering.h index 20e4f84..85e4b8e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.h +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.h @@ -60,21 +60,20 @@ struct tgsi_lowering_config unsigned lower_LRP:1; unsigned lower_FRC:1; unsigned lower_POW:1; unsigned lower_LIT:1; unsigned lower_EXP:1; unsigned lower_LOG:1; unsigned lower_DP4:1; unsigned lower_DP3:1; unsigned lower_DPH:1; unsigned lower_DP2:1; - unsigned lower_DP2A:1; unsigned lower_FLR:1; unsigned lower_CEIL:1; unsigned lower_TRUNC:1; /* bitmask of (1 << TGSI_TEXTURE_type): */ unsigned lower_TXP; /* To emulate certain texture wrap modes, this can be used * to saturate the specified tex coord to [0.0, 1.0]. The * bits are according to sampler #, ie. if, for example: diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index f244db6..e4cbdae 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -49,21 +49,20 @@ OP12(DP3) OP12(DP4) OP12(DST) OP12(MIN) OP12(MAX) OP12(SLT) OP12(SGE) OP13(MAD) OP12_TEX(TEX_LZ) OP13(LRP) OP11(SQRT) -OP13(DP2A) OP11(FRC) OP12_TEX(TXF_LZ) OP11(FLR) OP11(ROUND) OP11(EX2) OP11(LG2) OP12(POW) OP12(XPD) OP12(DPH) OP11(COS) diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 932545c..fc61351 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -255,24 +255,20 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, case TGSI_OPCODE_SCS: read_mask = write_mask & TGSI_WRITEMASK_XY ? TGSI_WRITEMASK_X : 0; break; case TGSI_OPCODE_EXP: case TGSI_OPCODE_LOG: read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0; break; - case TGSI_OPCODE_DP2A: - read_mask = src_idx == 2 ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_XY; - break; - case TGSI_OPCODE_DP2: read_mask = TGSI_WRITEMASK_XY; break; case TGSI_OPCODE_DP3: read_mask = TGSI_WRITEMASK_XYZ; break; case TGSI_OPCODE_DP4: read_mask = TGSI_WRITEMASK_XYZW; diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 20749a1..6602162 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -278,33 +278,20 @@ Perform a * b + c with no intermediate rounding step. dst.x = src0.x \times src1.x + src2.x dst.y = src0.y \times src1.y + src2.y dst.z = src0.z \times src1.z + src2.z dst.w = src0.w \times src1.w + src2.w -.. opcode:: DP2A - 2-component Dot Product And Add - -.. math:: - - dst.x = src0.x \times src1.x + src0.y \times src1.y + src2.x - - dst.y = src0.x \times src1.x + src0.y \times src1.y + src2.x - - dst.z = src0.x \times src1.x + src0.y \times src1.y + src2.x - - dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x - - .. opcode:: FRC - Fraction .. math:: dst.x = src.x - \lfloor src.x\rfloor dst.y = src.y - \lfloor src.y\rfloor dst.z = src.z - \lfloor src.z\rfloor diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.c b/src/gallium/drivers/etnaviv/etnaviv_compiler.c index f65a168..88f204c 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler.c +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.c @@ -2337,21 +2337,20 @@ etna_compile_shader(struct etna_shader_variant *v) const struct etna_specs *specs = v->shader->specs; struct tgsi_lowering_config lconfig = { .lower_SCS = specs->has_sin_cos_sqrt, .lower_FLR = !specs->has_sign_floor_ceil, .lower_CEIL = !specs->has_sign_floor_ceil, .lower_POW = true, .lower_EXP = true, .lower_LOG = true, .lower_DP2 = true, - .lower_DP2A = true, .lower_TRUNC = true, .lower_XPD = true }; c = CALLOC_STRUCT(etna_compile); if (!c) return false; memset(&c->lbl_usage, -1, sizeof(c->lbl_usage)); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index a458330..cc0ac48 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -44,21 +44,20 @@ static unsigned translate_opcode(unsigned opcode) case TGSI_OPCODE_ADD: return RC_OPCODE_ADD; case TGSI_OPCODE_DP3: return RC_OPCODE_DP3; case TGSI_OPCODE_DP4: return RC_OPCODE_DP4; case TGSI_OPCODE_DST: return RC_OPCODE_DST; case TGSI_OPCODE_MIN: return RC_OPCODE_MIN; case TGSI_OPCODE_MAX: return RC_OPCODE_MAX; case TGSI_OPCODE_SLT: return RC_OPCODE_SLT; case TGSI_OPCODE_SGE: return RC_OPCODE_SGE; case TGSI_OPCODE_MAD: return RC_OPCODE_MAD; case TGSI_OPCODE_LRP: return RC_OPCODE_LRP; - /* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */ case TGSI_OPCODE_FRC: return RC_OPCODE_FRC; case TGSI_OPCODE_FLR: return RC_OPCODE_FLR; case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; case TGSI_OPCODE_EX2: return RC_OPCODE_EX2; case TGSI_OPCODE_LG2: return RC_OPCODE_LG2; case TGSI_OPCODE_POW: return RC_OPCODE_POW; case TGSI_OPCODE_XPD: return RC_OPCODE_XPD; case TGSI_OPCODE_DPH: return RC_OPCODE_DPH; case TGSI_OPCODE_COS: return RC_OPCODE_COS; case TGSI_OPCODE_DDX: return RC_OPCODE_DDX; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index b49ecba..d3728fb 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -9082,21 +9082,21 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, - [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, + [21] = { ALU_OP0_NOP, tgsi_unsupported}, [22] = { ALU_OP0_NOP, tgsi_unsupported}, [23] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, [25] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, @@ -9280,21 +9280,21 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, [TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3}, [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, - [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, + [21] = { ALU_OP0_NOP, tgsi_unsupported}, [22] = { ALU_OP0_NOP, tgsi_unsupported}, [23] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, [25] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, @@ -9503,21 +9503,21 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, [TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3}, [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, - [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, + [21] = { ALU_OP0_NOP, tgsi_unsupported}, [22] = { ALU_OP0_NOP, tgsi_unsupported}, [23] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, [25] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr}, [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr}, [TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow}, [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index fc3ec5e..dd29f74 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -37,21 +37,20 @@ static boolean emit_vs_postamble( struct svga_shader_emitter *emit ); static boolean emit_ps_postamble( struct svga_shader_emitter *emit ); static unsigned translate_opcode(uint opcode) { switch (opcode) { case TGSI_OPCODE_ADD: return SVGA3DOP_ADD; - case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD; case TGSI_OPCODE_DP3: return SVGA3DOP_DP3; case TGSI_OPCODE_DP4: return SVGA3DOP_DP4; case TGSI_OPCODE_FRC: return SVGA3DOP_FRC; case TGSI_OPCODE_MAD: return SVGA3DOP_MAD; case TGSI_OPCODE_MAX: return SVGA3DOP_MAX; case TGSI_OPCODE_MIN: return SVGA3DOP_MIN; case TGSI_OPCODE_MOV: return SVGA3DOP_MOV; case TGSI_OPCODE_MUL: return SVGA3DOP_MUL; case TGSI_OPCODE_NOP: return SVGA3DOP_NOP; default: diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c index d7ec48e..56afd49 100644 --- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c +++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -3571,69 +3571,20 @@ emit_cmp(struct svga_shader_emitter_v10 *emit, &tmp_src, &inst->Src[1], &inst->Src[2], inst->Instruction.Saturate); free_temp_indexes(emit); return TRUE; } /** - * Emit code for TGSI_OPCODE_DP2A instruction. - */ -static boolean -emit_dp2a(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) -{ - /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x - * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x - * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x - * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x - * Translate into - * MAD tmp.x, s0.y, s1.y, s2.x - * MAD tmp.x, s0.x, s1.x, tmp.x - * MOV dst.xyzw, tmp.xxxx - */ - unsigned tmp = get_temp_index(emit); - struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); - struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); - - struct tgsi_full_src_register tmp_src_xxxx = - scalar_src(&tmp_src, TGSI_SWIZZLE_X); - struct tgsi_full_dst_register tmp_dst_x = - writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); - - struct tgsi_full_src_register src0_xxxx = - scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); - struct tgsi_full_src_register src0_yyyy = - scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); - struct tgsi_full_src_register src1_xxxx = - scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); - struct tgsi_full_src_register src1_yyyy = - scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); - struct tgsi_full_src_register src2_xxxx = - scalar_src(&inst->Src[2], TGSI_SWIZZLE_X); - - emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy, - &src1_yyyy, &src2_xxxx, FALSE); - emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx, - &src1_xxxx, &tmp_src_xxxx, FALSE); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], - &tmp_src_xxxx, inst->Instruction.Saturate); - - free_temp_indexes(emit); - - return TRUE; -} - - -/** * Emit code for TGSI_OPCODE_DPH instruction. */ static boolean emit_dph(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { /* * DP3 tmp, s0, s1 * ADD dst, tmp, s1.wwww */ @@ -5754,22 +5705,20 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, return emit_arl_uarl(emit, inst); case TGSI_OPCODE_BGNSUB: /* no-op */ return TRUE; case TGSI_OPCODE_CAL: return emit_cal(emit, inst); case TGSI_OPCODE_CMP: return emit_cmp(emit, inst); case TGSI_OPCODE_COS: return emit_sincos(emit, inst); - case TGSI_OPCODE_DP2A: - return emit_dp2a(emit, inst); case TGSI_OPCODE_DPH: return emit_dph(emit, inst); case TGSI_OPCODE_DST: return emit_dst(emit, inst); case TGSI_OPCODE_EX2: return emit_ex2(emit, inst); case TGSI_OPCODE_EXP: return emit_exp(emit, inst); case TGSI_OPCODE_IF: return emit_if(emit, inst); diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 9fd8419..5c4af89 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -351,21 +351,21 @@ struct tgsi_property_data { #define TGSI_OPCODE_DST 11 #define TGSI_OPCODE_MIN 12 #define TGSI_OPCODE_MAX 13 #define TGSI_OPCODE_SLT 14 #define TGSI_OPCODE_SGE 15 #define TGSI_OPCODE_MAD 16 #define TGSI_OPCODE_TEX_LZ 17 #define TGSI_OPCODE_LRP 18 #define TGSI_OPCODE_FMA 19 #define TGSI_OPCODE_SQRT 20 -#define TGSI_OPCODE_DP2A 21 +/* gap */ #define TGSI_OPCODE_F2U64 22 #define TGSI_OPCODE_F2I64 23 #define TGSI_OPCODE_FRC 24 #define TGSI_OPCODE_TXF_LZ 25 #define TGSI_OPCODE_FLR 26 #define TGSI_OPCODE_ROUND 27 #define TGSI_OPCODE_EX2 28 #define TGSI_OPCODE_LG2 29 #define TGSI_OPCODE_POW 30 #define TGSI_OPCODE_XPD 31 diff --git a/src/mesa/state_tracker/st_atifs_to_tgsi.c b/src/mesa/state_tracker/st_atifs_to_tgsi.c index 13e013c..2a171b5 100644 --- a/src/mesa/state_tracker/st_atifs_to_tgsi.c +++ b/src/mesa/state_tracker/st_atifs_to_tgsi.c @@ -221,21 +221,20 @@ emit_special_inst(struct st_translate *t, const struct instruction_desc *desc, src[0] = ureg_src(tmp[0]); src[1] = args[0]; src[2] = args[1]; ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3, 0); } else if (!strcmp(desc->name, "CND0")) { src[0] = args[2]; src[1] = args[1]; src[2] = args[0]; ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3, 0); } else if (!strcmp(desc->name, "DOT2_ADD")) { - /* note: DP2A is not implemented in most pipe drivers */ tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); /* re-purpose a1 */ src[0] = args[0]; src[1] = args[1]; ureg_insn(t->ureg, TGSI_OPCODE_DP2, tmp, 1, src, 2, 0); src[0] = ureg_src(tmp[0]); src[1] = ureg_scalar(args[2], TGSI_SWIZZLE_Z); ureg_insn(t->ureg, TGSI_OPCODE_ADD, dst, 1, src, 2, 0); } } -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev