Re: [Mesa-dev] [PATCH 1/4] tgsi/lowering: add support for lowering FLR and CEIL

2016-04-14 Thread Christian Gmeiner
2016-04-14 0:42 GMT+02:00 Rob Clark :
> From: Russell King 
>
> Add support for lowering FLR and CEIL to FRC/SUB and FRC/ADD
> instructions for GPUs that support FRC but not FLR or CEIL.  Since
> these uses FRC, it is invalid to ask for FLR or CEIL to be lowered
> along with FRC, so add an assert to catch this invalid configuration.
>
> We also need to deal with FLR instructions emitted by the lowering
> code.  Fix these up with the FRC+SUB equivalent when FLR lowering is
> enabled.
>
> Signed-off-by: Russell King 
> Reviewed-by: Rob Clark 
> ---
>  src/gallium/auxiliary/tgsi/tgsi_lowering.c | 167 
> +
>  src/gallium/auxiliary/tgsi/tgsi_lowering.h |   2 +
>  2 files changed, 149 insertions(+), 20 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c 
> b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
> index 0ffd855..b2dd37e 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
> @@ -676,14 +676,19 @@ transform_lit(struct tgsi_transform_context *tctx,
>   *  dst.w = 1.0
>   *
>   * ; needs: 1 tmp, imm{1.0}
> - * FLR tmpA.x, src.x
> + * if (lowering FLR) {
> + *   FRC tmpA.x, src.x
> + *   SUB tmpA.x, src.x, tmpA.x
> + * } else {
> + *   FLR tmpA.x, src.x
> + * }
>   * EX2 tmpA.y, src.x
>   * SUB dst.y, src.x, tmpA.x
>   * EX2 dst.x, tmpA.x
>   * MOV dst.z, tmpA.y
>   * MOV dst.w, imm{1.0}
>   */
> -#define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
> +#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
> NINST(1)+ NINST(1) - OINST(1))
>  #define EXP_TMP  1
>  static void
> @@ -696,14 +701,35 @@ transform_exp(struct tgsi_transform_context *tctx,
> struct tgsi_full_instruction new_inst;
>
> if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
> -  /* FLR tmpA.x, src.x */
> -  new_inst = tgsi_default_full_instruction();
> -  new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
> -  new_inst.Instruction.NumDstRegs = 1;
> -  reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_X);
> -  new_inst.Instruction.NumSrcRegs = 1;
> -  reg_src(_inst.Src[0], src, SWIZ(X, _, _, _));
> -  tctx->emit_instruction(tctx, _inst);
> +  if (ctx->config->lower_FLR) {
> + /* FRC tmpA.x, src.x */
> + new_inst = tgsi_default_full_instruction();
> + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
> + new_inst.Instruction.NumDstRegs = 1;
> + reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_X);
> + new_inst.Instruction.NumSrcRegs = 1;
> + reg_src(_inst.Src[0], src, SWIZ(X, _, _, _));
> + tctx->emit_instruction(tctx, _inst);
> +
> + /* SUB tmpA.x, src.x, tmpA.x */
> + new_inst = tgsi_default_full_instruction();
> + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
> + new_inst.Instruction.NumDstRegs = 1;
> + reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_X);
> + new_inst.Instruction.NumSrcRegs = 2;
> + reg_src(_inst.Src[0], src, SWIZ(X, _, _, _));
> + reg_src(_inst.Src[1], >tmp[A].src, SWIZ(X, _, _, _));
> + tctx->emit_instruction(tctx, _inst);
> + } else {
> + /* FLR tmpA.x, src.x */
> + new_inst = tgsi_default_full_instruction();
> + new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
> + new_inst.Instruction.NumDstRegs = 1;
> + reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_X);
> + new_inst.Instruction.NumSrcRegs = 1;
> + reg_src(_inst.Src[0], src, SWIZ(X, _, _, _));
> + tctx->emit_instruction(tctx, _inst);
> +  }
> }
>
> if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
> @@ -771,14 +797,19 @@ transform_exp(struct tgsi_transform_context *tctx,
>   *
>   * ; needs: 1 tmp, imm{1.0}
>   * LG2 tmpA.x, |src.x|
> - * FLR tmpA.y, tmpA.x
> + * if (lowering FLR) {
> + *   FRC tmpA.y, tmpA.x
> + *   SUB tmpA.y, tmpA.x, tmpA.y
> + * } else {
> + *   FLR tmpA.y, tmpA.x
> + * }
>   * EX2 tmpA.z, tmpA.y
>   * RCP tmpA.z, tmpA.z
>   * MUL dst.y, |src.x|, tmpA.z
>   * MOV dst.xz, tmpA.yx
>   * MOV dst.w, imm{1.0}
>   */
> -#define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \
> +#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
> NINST(2) + NINST(1) + NINST(1) - OINST(1))
>  #define LOG_TMP  1
>  static void
> @@ -803,14 +834,35 @@ transform_log(struct tgsi_transform_context *tctx,
> }
>
> if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
> -  /* FLR tmpA.y, tmpA.x */
> -  new_inst = tgsi_default_full_instruction();
> -  new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
> -  new_inst.Instruction.NumDstRegs = 1;
> -  reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_Y);
> -  new_inst.Instruction.NumSrcRegs = 1;
> -  reg_src(_inst.Src[0], >tmp[A].src, SWIZ(_, X, _, _));
> -  

[Mesa-dev] [PATCH 1/4] tgsi/lowering: add support for lowering FLR and CEIL

2016-04-13 Thread Rob Clark
From: Russell King 

Add support for lowering FLR and CEIL to FRC/SUB and FRC/ADD
instructions for GPUs that support FRC but not FLR or CEIL.  Since
these uses FRC, it is invalid to ask for FLR or CEIL to be lowered
along with FRC, so add an assert to catch this invalid configuration.

We also need to deal with FLR instructions emitted by the lowering
code.  Fix these up with the FRC+SUB equivalent when FLR lowering is
enabled.

Signed-off-by: Russell King 
Reviewed-by: Rob Clark 
---
 src/gallium/auxiliary/tgsi/tgsi_lowering.c | 167 +
 src/gallium/auxiliary/tgsi/tgsi_lowering.h |   2 +
 2 files changed, 149 insertions(+), 20 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c 
b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
index 0ffd855..b2dd37e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
@@ -676,14 +676,19 @@ transform_lit(struct tgsi_transform_context *tctx,
  *  dst.w = 1.0
  *
  * ; needs: 1 tmp, imm{1.0}
- * FLR tmpA.x, src.x
+ * if (lowering FLR) {
+ *   FRC tmpA.x, src.x
+ *   SUB tmpA.x, src.x, tmpA.x
+ * } else {
+ *   FLR tmpA.x, src.x
+ * }
  * EX2 tmpA.y, src.x
  * SUB dst.y, src.x, tmpA.x
  * EX2 dst.x, tmpA.x
  * MOV dst.z, tmpA.y
  * MOV dst.w, imm{1.0}
  */
-#define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
+#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
NINST(1)+ NINST(1) - OINST(1))
 #define EXP_TMP  1
 static void
@@ -696,14 +701,35 @@ transform_exp(struct tgsi_transform_context *tctx,
struct tgsi_full_instruction new_inst;
 
if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
-  /* FLR tmpA.x, src.x */
-  new_inst = tgsi_default_full_instruction();
-  new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
-  new_inst.Instruction.NumDstRegs = 1;
-  reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_X);
-  new_inst.Instruction.NumSrcRegs = 1;
-  reg_src(_inst.Src[0], src, SWIZ(X, _, _, _));
-  tctx->emit_instruction(tctx, _inst);
+  if (ctx->config->lower_FLR) {
+ /* FRC tmpA.x, src.x */
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
+ new_inst.Instruction.NumDstRegs = 1;
+ reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_X);
+ new_inst.Instruction.NumSrcRegs = 1;
+ reg_src(_inst.Src[0], src, SWIZ(X, _, _, _));
+ tctx->emit_instruction(tctx, _inst);
+
+ /* SUB tmpA.x, src.x, tmpA.x */
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+ new_inst.Instruction.NumDstRegs = 1;
+ reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_X);
+ new_inst.Instruction.NumSrcRegs = 2;
+ reg_src(_inst.Src[0], src, SWIZ(X, _, _, _));
+ reg_src(_inst.Src[1], >tmp[A].src, SWIZ(X, _, _, _));
+ tctx->emit_instruction(tctx, _inst);
+ } else {
+ /* FLR tmpA.x, src.x */
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
+ new_inst.Instruction.NumDstRegs = 1;
+ reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_X);
+ new_inst.Instruction.NumSrcRegs = 1;
+ reg_src(_inst.Src[0], src, SWIZ(X, _, _, _));
+ tctx->emit_instruction(tctx, _inst);
+  }
}
 
if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
@@ -771,14 +797,19 @@ transform_exp(struct tgsi_transform_context *tctx,
  *
  * ; needs: 1 tmp, imm{1.0}
  * LG2 tmpA.x, |src.x|
- * FLR tmpA.y, tmpA.x
+ * if (lowering FLR) {
+ *   FRC tmpA.y, tmpA.x
+ *   SUB tmpA.y, tmpA.x, tmpA.y
+ * } else {
+ *   FLR tmpA.y, tmpA.x
+ * }
  * EX2 tmpA.z, tmpA.y
  * RCP tmpA.z, tmpA.z
  * MUL dst.y, |src.x|, tmpA.z
  * MOV dst.xz, tmpA.yx
  * MOV dst.w, imm{1.0}
  */
-#define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \
+#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
NINST(2) + NINST(1) + NINST(1) - OINST(1))
 #define LOG_TMP  1
 static void
@@ -803,14 +834,35 @@ transform_log(struct tgsi_transform_context *tctx,
}
 
if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
-  /* FLR tmpA.y, tmpA.x */
-  new_inst = tgsi_default_full_instruction();
-  new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
-  new_inst.Instruction.NumDstRegs = 1;
-  reg_dst(_inst.Dst[0], >tmp[A].dst, TGSI_WRITEMASK_Y);
-  new_inst.Instruction.NumSrcRegs = 1;
-  reg_src(_inst.Src[0], >tmp[A].src, SWIZ(_, X, _, _));
-  tctx->emit_instruction(tctx, _inst);
+  if (ctx->config->lower_FLR) {
+ /* FRC tmpA.y, tmpA.x */
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
+ new_inst.Instruction.NumDstRegs = 1;
+ reg_dst(_inst.Dst[0],