Re: [PATCH 09/11] tcg/riscv: Improve setcond expansion

2023-05-16 Thread Alistair Francis
On Wed, May 3, 2023 at 6:59 PM Richard Henderson
 wrote:
>
> Split out a helper function, tcg_out_setcond_int, which does not
> always produce the complete boolean result, but returns a set of
> flags to do so.
>
> Based on 21af16198425, the same improvement for loongarch64.
>
> Signed-off-by: Richard Henderson 

Acked-by: Alistair Francis 

Alistair

> ---
>  tcg/riscv/tcg-target.c.inc | 164 +++--
>  1 file changed, 121 insertions(+), 43 deletions(-)
>
> diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
> index 044ddfb160..84b646105c 100644
> --- a/tcg/riscv/tcg-target.c.inc
> +++ b/tcg/riscv/tcg-target.c.inc
> @@ -812,50 +812,128 @@ static void tcg_out_brcond(TCGContext *s, TCGCond 
> cond, TCGReg arg1,
>  tcg_out_opc_branch(s, op, arg1, arg2, 0);
>  }
>
> -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
> -TCGReg arg1, TCGReg arg2)
> +#define SETCOND_INVTCG_TARGET_NB_REGS
> +#define SETCOND_NEZ(SETCOND_INV << 1)
> +#define SETCOND_FLAGS  (SETCOND_INV | SETCOND_NEZ)
> +
> +static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
> +   TCGReg arg1, tcg_target_long arg2, bool c2)
>  {
> +int flags = 0;
> +
>  switch (cond) {
> -case TCG_COND_EQ:
> -tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
> -tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1);
> -break;
> -case TCG_COND_NE:
> -tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
> -tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret);
> -break;
> -case TCG_COND_LT:
> -tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
> -break;
> -case TCG_COND_GE:
> -tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
> -tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
> -break;
> -case TCG_COND_LE:
> -tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
> -tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
> -break;
> -case TCG_COND_GT:
> -tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
> -break;
> -case TCG_COND_LTU:
> -tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
> -break;
> -case TCG_COND_GEU:
> -tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
> -tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
> -break;
> -case TCG_COND_LEU:
> -tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
> -tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
> -break;
> -case TCG_COND_GTU:
> -tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
> +case TCG_COND_EQ:/* -> NE  */
> +case TCG_COND_GE:/* -> LT  */
> +case TCG_COND_GEU:   /* -> LTU */
> +case TCG_COND_GT:/* -> LE  */
> +case TCG_COND_GTU:   /* -> LEU */
> +cond = tcg_invert_cond(cond);
> +flags ^= SETCOND_INV;
>  break;
>  default:
> - g_assert_not_reached();
> - break;
> - }
> +break;
> +}
> +
> +switch (cond) {
> +case TCG_COND_LE:
> +case TCG_COND_LEU:
> +/*
> + * If we have a constant input, the most efficient way to implement
> + * LE is by adding 1 and using LT.  Watch out for wrap around for 
> LEU.
> + * We don't need to care for this for LE because the constant input
> + * is constrained to signed 12-bit, and 0x800 is representable in the
> + * temporary register.
> + */
> +if (c2) {
> +if (cond == TCG_COND_LEU) {
> +/* unsigned <= -1 is true */
> +if (arg2 == -1) {
> +tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & 
> SETCOND_INV));
> +return ret;
> +}
> +cond = TCG_COND_LTU;
> +} else {
> +cond = TCG_COND_LT;
> +}
> +tcg_debug_assert(arg2 <= 0x7ff);
> +if (++arg2 == 0x800) {
> +tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
> +arg2 = TCG_REG_TMP0;
> +c2 = false;
> +}
> +} else {
> +TCGReg tmp = arg2;
> +arg2 = arg1;
> +arg1 = tmp;
> +cond = tcg_swap_cond(cond);/* LE -> GE */
> +cond = tcg_invert_cond(cond);  /* GE -> LT */
> +flags ^= SETCOND_INV;
> +}
> +break;
> +default:
> +break;
> +}
> +
> +switch (cond) {
> +case TCG_COND_NE:
> +flags |= SETCOND_NEZ;
> +if (!c2) {
> +tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
> +} else if (arg2 == 0) {
> +ret = arg1;
> +} else {
> +tcg_out_opc_reg(s, OPC_XORI, ret, arg1, arg2);
> +}
> +break;
> +
> +case TCG_COND_LT:
> +if (c2) {
> +tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2);
> +} else {
> +

Re: [PATCH 09/11] tcg/riscv: Improve setcond expansion

2023-05-08 Thread Daniel Henrique Barboza




On 5/3/23 05:56, Richard Henderson wrote:

Split out a helper function, tcg_out_setcond_int, which does not
always produce the complete boolean result, but returns a set of
flags to do so.

Based on 21af16198425, the same improvement for loongarch64.

Signed-off-by: Richard Henderson 
---


Reviewed-by: Daniel Henrique Barboza 


  tcg/riscv/tcg-target.c.inc | 164 +++--
  1 file changed, 121 insertions(+), 43 deletions(-)

diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index 044ddfb160..84b646105c 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -812,50 +812,128 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, 
TCGReg arg1,
  tcg_out_opc_branch(s, op, arg1, arg2, 0);
  }
  
-static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,

-TCGReg arg1, TCGReg arg2)
+#define SETCOND_INVTCG_TARGET_NB_REGS
+#define SETCOND_NEZ(SETCOND_INV << 1)
+#define SETCOND_FLAGS  (SETCOND_INV | SETCOND_NEZ)
+
+static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
+   TCGReg arg1, tcg_target_long arg2, bool c2)
  {
+int flags = 0;
+
  switch (cond) {
-case TCG_COND_EQ:
-tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
-tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1);
-break;
-case TCG_COND_NE:
-tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
-tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret);
-break;
-case TCG_COND_LT:
-tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
-break;
-case TCG_COND_GE:
-tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
-tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
-break;
-case TCG_COND_LE:
-tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
-tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
-break;
-case TCG_COND_GT:
-tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
-break;
-case TCG_COND_LTU:
-tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
-break;
-case TCG_COND_GEU:
-tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
-tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
-break;
-case TCG_COND_LEU:
-tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
-tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
-break;
-case TCG_COND_GTU:
-tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
+case TCG_COND_EQ:/* -> NE  */
+case TCG_COND_GE:/* -> LT  */
+case TCG_COND_GEU:   /* -> LTU */
+case TCG_COND_GT:/* -> LE  */
+case TCG_COND_GTU:   /* -> LEU */
+cond = tcg_invert_cond(cond);
+flags ^= SETCOND_INV;
  break;
  default:
- g_assert_not_reached();
- break;
- }
+break;
+}
+
+switch (cond) {
+case TCG_COND_LE:
+case TCG_COND_LEU:
+/*
+ * If we have a constant input, the most efficient way to implement
+ * LE is by adding 1 and using LT.  Watch out for wrap around for LEU.
+ * We don't need to care for this for LE because the constant input
+ * is constrained to signed 12-bit, and 0x800 is representable in the
+ * temporary register.
+ */
+if (c2) {
+if (cond == TCG_COND_LEU) {
+/* unsigned <= -1 is true */
+if (arg2 == -1) {
+tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV));
+return ret;
+}
+cond = TCG_COND_LTU;
+} else {
+cond = TCG_COND_LT;
+}
+tcg_debug_assert(arg2 <= 0x7ff);
+if (++arg2 == 0x800) {
+tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
+arg2 = TCG_REG_TMP0;
+c2 = false;
+}
+} else {
+TCGReg tmp = arg2;
+arg2 = arg1;
+arg1 = tmp;
+cond = tcg_swap_cond(cond);/* LE -> GE */
+cond = tcg_invert_cond(cond);  /* GE -> LT */
+flags ^= SETCOND_INV;
+}
+break;
+default:
+break;
+}
+
+switch (cond) {
+case TCG_COND_NE:
+flags |= SETCOND_NEZ;
+if (!c2) {
+tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
+} else if (arg2 == 0) {
+ret = arg1;
+} else {
+tcg_out_opc_reg(s, OPC_XORI, ret, arg1, arg2);
+}
+break;
+
+case TCG_COND_LT:
+if (c2) {
+tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2);
+} else {
+tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
+}
+break;
+
+case TCG_COND_LTU:
+if (c2) {
+tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, arg2);
+} else {
+tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
+}
+break;
+
+  

[PATCH 09/11] tcg/riscv: Improve setcond expansion

2023-05-03 Thread Richard Henderson
Split out a helper function, tcg_out_setcond_int, which does not
always produce the complete boolean result, but returns a set of
flags to do so.

Based on 21af16198425, the same improvement for loongarch64.

Signed-off-by: Richard Henderson 
---
 tcg/riscv/tcg-target.c.inc | 164 +++--
 1 file changed, 121 insertions(+), 43 deletions(-)

diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index 044ddfb160..84b646105c 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -812,50 +812,128 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, 
TCGReg arg1,
 tcg_out_opc_branch(s, op, arg1, arg2, 0);
 }
 
-static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
-TCGReg arg1, TCGReg arg2)
+#define SETCOND_INVTCG_TARGET_NB_REGS
+#define SETCOND_NEZ(SETCOND_INV << 1)
+#define SETCOND_FLAGS  (SETCOND_INV | SETCOND_NEZ)
+
+static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
+   TCGReg arg1, tcg_target_long arg2, bool c2)
 {
+int flags = 0;
+
 switch (cond) {
-case TCG_COND_EQ:
-tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
-tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1);
-break;
-case TCG_COND_NE:
-tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
-tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret);
-break;
-case TCG_COND_LT:
-tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
-break;
-case TCG_COND_GE:
-tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
-tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
-break;
-case TCG_COND_LE:
-tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
-tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
-break;
-case TCG_COND_GT:
-tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
-break;
-case TCG_COND_LTU:
-tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
-break;
-case TCG_COND_GEU:
-tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
-tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
-break;
-case TCG_COND_LEU:
-tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
-tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
-break;
-case TCG_COND_GTU:
-tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
+case TCG_COND_EQ:/* -> NE  */
+case TCG_COND_GE:/* -> LT  */
+case TCG_COND_GEU:   /* -> LTU */
+case TCG_COND_GT:/* -> LE  */
+case TCG_COND_GTU:   /* -> LEU */
+cond = tcg_invert_cond(cond);
+flags ^= SETCOND_INV;
 break;
 default:
- g_assert_not_reached();
- break;
- }
+break;
+}
+
+switch (cond) {
+case TCG_COND_LE:
+case TCG_COND_LEU:
+/*
+ * If we have a constant input, the most efficient way to implement
+ * LE is by adding 1 and using LT.  Watch out for wrap around for LEU.
+ * We don't need to care for this for LE because the constant input
+ * is constrained to signed 12-bit, and 0x800 is representable in the
+ * temporary register.
+ */
+if (c2) {
+if (cond == TCG_COND_LEU) {
+/* unsigned <= -1 is true */
+if (arg2 == -1) {
+tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV));
+return ret;
+}
+cond = TCG_COND_LTU;
+} else {
+cond = TCG_COND_LT;
+}
+tcg_debug_assert(arg2 <= 0x7ff);
+if (++arg2 == 0x800) {
+tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
+arg2 = TCG_REG_TMP0;
+c2 = false;
+}
+} else {
+TCGReg tmp = arg2;
+arg2 = arg1;
+arg1 = tmp;
+cond = tcg_swap_cond(cond);/* LE -> GE */
+cond = tcg_invert_cond(cond);  /* GE -> LT */
+flags ^= SETCOND_INV;
+}
+break;
+default:
+break;
+}
+
+switch (cond) {
+case TCG_COND_NE:
+flags |= SETCOND_NEZ;
+if (!c2) {
+tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
+} else if (arg2 == 0) {
+ret = arg1;
+} else {
+tcg_out_opc_reg(s, OPC_XORI, ret, arg1, arg2);
+}
+break;
+
+case TCG_COND_LT:
+if (c2) {
+tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2);
+} else {
+tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
+}
+break;
+
+case TCG_COND_LTU:
+if (c2) {
+tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, arg2);
+} else {
+tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
+}
+break;
+
+default:
+g_assert_not_reached();
+}
+
+return ret | flags;
+}
+
+static void