Re: [Mesa-dev] [PATCH 1/4] tgsi/lowering: add support for lowering FLR and CEIL
2016-04-14 0:42 GMT+02:00 Rob Clark: > From: Russell King > > Add support for lowering FLR and CEIL to FRC/SUB and FRC/ADD > instructions for GPUs that support FRC but not FLR or CEIL. Since > these uses FRC, it is invalid to ask for FLR or CEIL to be lowered > along with FRC, so add an assert to catch this invalid configuration. > > We also need to deal with FLR instructions emitted by the lowering > code. Fix these up with the FRC+SUB equivalent when FLR lowering is > enabled. > > Signed-off-by: Russell King > Reviewed-by: Rob Clark > --- > src/gallium/auxiliary/tgsi/tgsi_lowering.c | 167 > + > src/gallium/auxiliary/tgsi/tgsi_lowering.h | 2 + > 2 files changed, 149 insertions(+), 20 deletions(-) > > diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c > b/src/gallium/auxiliary/tgsi/tgsi_lowering.c > index 0ffd855..b2dd37e 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c > @@ -676,14 +676,19 @@ transform_lit(struct tgsi_transform_context *tctx, > * dst.w = 1.0 > * > * ; needs: 1 tmp, imm{1.0} > - * FLR tmpA.x, src.x > + * if (lowering FLR) { > + * FRC tmpA.x, src.x > + * SUB tmpA.x, src.x, tmpA.x > + * } else { > + * FLR tmpA.x, src.x > + * } > * EX2 tmpA.y, src.x > * SUB dst.y, src.x, tmpA.x > * EX2 dst.x, tmpA.x > * MOV dst.z, tmpA.y > * MOV dst.w, imm{1.0} > */ > -#define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \ > +#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \ > NINST(1)+ NINST(1) - OINST(1)) > #define EXP_TMP 1 > static void > @@ -696,14 +701,35 @@ transform_exp(struct tgsi_transform_context *tctx, > struct tgsi_full_instruction new_inst; > > if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { > - /* FLR tmpA.x, src.x */ > - new_inst = tgsi_default_full_instruction(); > - new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; > - new_inst.Instruction.NumDstRegs = 1; > - reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_X); > - new_inst.Instruction.NumSrcRegs = 1; > - reg_src(_inst.Src[0], src, SWIZ(X, _, _, _)); > - tctx->emit_instruction(tctx, _inst); > + if (ctx->config->lower_FLR) { > + /* FRC tmpA.x, src.x */ > + new_inst = tgsi_default_full_instruction(); > + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; > + new_inst.Instruction.NumDstRegs = 1; > + reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_X); > + new_inst.Instruction.NumSrcRegs = 1; > + reg_src(_inst.Src[0], src, SWIZ(X, _, _, _)); > + tctx->emit_instruction(tctx, _inst); > + > + /* SUB tmpA.x, src.x, tmpA.x */ > + new_inst = tgsi_default_full_instruction(); > + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; > + new_inst.Instruction.NumDstRegs = 1; > + reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_X); > + new_inst.Instruction.NumSrcRegs = 2; > + reg_src(_inst.Src[0], src, SWIZ(X, _, _, _)); > + reg_src(_inst.Src[1], >tmp[A].src, SWIZ(X, _, _, _)); > + tctx->emit_instruction(tctx, _inst); > + } else { > + /* FLR tmpA.x, src.x */ > + new_inst = tgsi_default_full_instruction(); > + new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; > + new_inst.Instruction.NumDstRegs = 1; > + reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_X); > + new_inst.Instruction.NumSrcRegs = 1; > + reg_src(_inst.Src[0], src, SWIZ(X, _, _, _)); > + tctx->emit_instruction(tctx, _inst); > + } > } > > if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { > @@ -771,14 +797,19 @@ transform_exp(struct tgsi_transform_context *tctx, > * > * ; needs: 1 tmp, imm{1.0} > * LG2 tmpA.x, |src.x| > - * FLR tmpA.y, tmpA.x > + * if (lowering FLR) { > + * FRC tmpA.y, tmpA.x > + * SUB tmpA.y, tmpA.x, tmpA.y > + * } else { > + * FLR tmpA.y, tmpA.x > + * } > * EX2 tmpA.z, tmpA.y > * RCP tmpA.z, tmpA.z > * MUL dst.y, |src.x|, tmpA.z > * MOV dst.xz, tmpA.yx > * MOV dst.w, imm{1.0} > */ > -#define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \ > +#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \ > NINST(2) + NINST(1) + NINST(1) - OINST(1)) > #define LOG_TMP 1 > static void > @@ -803,14 +834,35 @@ transform_log(struct tgsi_transform_context *tctx, > } > > if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { > - /* FLR tmpA.y, tmpA.x */ > - new_inst = tgsi_default_full_instruction(); > - new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; > - new_inst.Instruction.NumDstRegs = 1; > - reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_Y); > - new_inst.Instruction.NumSrcRegs = 1; > - reg_src(_inst.Src[0], >tmp[A].src, SWIZ(_, X, _, _)); > -
[Mesa-dev] [PATCH 1/4] tgsi/lowering: add support for lowering FLR and CEIL
From: Russell KingAdd support for lowering FLR and CEIL to FRC/SUB and FRC/ADD instructions for GPUs that support FRC but not FLR or CEIL. Since these uses FRC, it is invalid to ask for FLR or CEIL to be lowered along with FRC, so add an assert to catch this invalid configuration. We also need to deal with FLR instructions emitted by the lowering code. Fix these up with the FRC+SUB equivalent when FLR lowering is enabled. Signed-off-by: Russell King Reviewed-by: Rob Clark --- src/gallium/auxiliary/tgsi/tgsi_lowering.c | 167 + src/gallium/auxiliary/tgsi/tgsi_lowering.h | 2 + 2 files changed, 149 insertions(+), 20 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c index 0ffd855..b2dd37e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c @@ -676,14 +676,19 @@ transform_lit(struct tgsi_transform_context *tctx, * dst.w = 1.0 * * ; needs: 1 tmp, imm{1.0} - * FLR tmpA.x, src.x + * if (lowering FLR) { + * FRC tmpA.x, src.x + * SUB tmpA.x, src.x, tmpA.x + * } else { + * FLR tmpA.x, src.x + * } * EX2 tmpA.y, src.x * SUB dst.y, src.x, tmpA.x * EX2 dst.x, tmpA.x * MOV dst.z, tmpA.y * MOV dst.w, imm{1.0} */ -#define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \ +#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \ NINST(1)+ NINST(1) - OINST(1)) #define EXP_TMP 1 static void @@ -696,14 +701,35 @@ transform_exp(struct tgsi_transform_context *tctx, struct tgsi_full_instruction new_inst; if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { - /* FLR tmpA.x, src.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_X); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(_inst.Src[0], src, SWIZ(X, _, _, _)); - tctx->emit_instruction(tctx, _inst); + if (ctx->config->lower_FLR) { + /* FRC tmpA.x, src.x */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_X); + new_inst.Instruction.NumSrcRegs = 1; + reg_src(_inst.Src[0], src, SWIZ(X, _, _, _)); + tctx->emit_instruction(tctx, _inst); + + /* SUB tmpA.x, src.x, tmpA.x */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_X); + new_inst.Instruction.NumSrcRegs = 2; + reg_src(_inst.Src[0], src, SWIZ(X, _, _, _)); + reg_src(_inst.Src[1], >tmp[A].src, SWIZ(X, _, _, _)); + tctx->emit_instruction(tctx, _inst); + } else { + /* FLR tmpA.x, src.x */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_X); + new_inst.Instruction.NumSrcRegs = 1; + reg_src(_inst.Src[0], src, SWIZ(X, _, _, _)); + tctx->emit_instruction(tctx, _inst); + } } if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { @@ -771,14 +797,19 @@ transform_exp(struct tgsi_transform_context *tctx, * * ; needs: 1 tmp, imm{1.0} * LG2 tmpA.x, |src.x| - * FLR tmpA.y, tmpA.x + * if (lowering FLR) { + * FRC tmpA.y, tmpA.x + * SUB tmpA.y, tmpA.x, tmpA.y + * } else { + * FLR tmpA.y, tmpA.x + * } * EX2 tmpA.z, tmpA.y * RCP tmpA.z, tmpA.z * MUL dst.y, |src.x|, tmpA.z * MOV dst.xz, tmpA.yx * MOV dst.w, imm{1.0} */ -#define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \ +#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \ NINST(2) + NINST(1) + NINST(1) - OINST(1)) #define LOG_TMP 1 static void @@ -803,14 +834,35 @@ transform_log(struct tgsi_transform_context *tctx, } if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { - /* FLR tmpA.y, tmpA.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_Y); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(_inst.Src[0], >tmp[A].src, SWIZ(_, X, _, _)); - tctx->emit_instruction(tctx, _inst); + if (ctx->config->lower_FLR) { + /* FRC tmpA.y, tmpA.x */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(_inst.Dst[0],