Re: [PATCH 09/11] tcg/riscv: Improve setcond expansion
On Wed, May 3, 2023 at 6:59 PM Richard Henderson wrote: > > Split out a helper function, tcg_out_setcond_int, which does not > always produce the complete boolean result, but returns a set of > flags to do so. > > Based on 21af16198425, the same improvement for loongarch64. > > Signed-off-by: Richard Henderson Acked-by: Alistair Francis Alistair > --- > tcg/riscv/tcg-target.c.inc | 164 +++-- > 1 file changed, 121 insertions(+), 43 deletions(-) > > diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc > index 044ddfb160..84b646105c 100644 > --- a/tcg/riscv/tcg-target.c.inc > +++ b/tcg/riscv/tcg-target.c.inc > @@ -812,50 +812,128 @@ static void tcg_out_brcond(TCGContext *s, TCGCond > cond, TCGReg arg1, > tcg_out_opc_branch(s, op, arg1, arg2, 0); > } > > -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, > -TCGReg arg1, TCGReg arg2) > +#define SETCOND_INVTCG_TARGET_NB_REGS > +#define SETCOND_NEZ(SETCOND_INV << 1) > +#define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) > + > +static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret, > + TCGReg arg1, tcg_target_long arg2, bool c2) > { > +int flags = 0; > + > switch (cond) { > -case TCG_COND_EQ: > -tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2); > -tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1); > -break; > -case TCG_COND_NE: > -tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2); > -tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret); > -break; > -case TCG_COND_LT: > -tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); > -break; > -case TCG_COND_GE: > -tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); > -tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); > -break; > -case TCG_COND_LE: > -tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); > -tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); > -break; > -case TCG_COND_GT: > -tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); > -break; > -case TCG_COND_LTU: > -tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); > -break; > -case TCG_COND_GEU: > -tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); > -tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); > -break; > -case TCG_COND_LEU: > -tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); > -tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); > -break; > -case TCG_COND_GTU: > -tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); > +case TCG_COND_EQ:/* -> NE */ > +case TCG_COND_GE:/* -> LT */ > +case TCG_COND_GEU: /* -> LTU */ > +case TCG_COND_GT:/* -> LE */ > +case TCG_COND_GTU: /* -> LEU */ > +cond = tcg_invert_cond(cond); > +flags ^= SETCOND_INV; > break; > default: > - g_assert_not_reached(); > - break; > - } > +break; > +} > + > +switch (cond) { > +case TCG_COND_LE: > +case TCG_COND_LEU: > +/* > + * If we have a constant input, the most efficient way to implement > + * LE is by adding 1 and using LT. Watch out for wrap around for > LEU. > + * We don't need to care for this for LE because the constant input > + * is constrained to signed 12-bit, and 0x800 is representable in the > + * temporary register. > + */ > +if (c2) { > +if (cond == TCG_COND_LEU) { > +/* unsigned <= -1 is true */ > +if (arg2 == -1) { > +tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & > SETCOND_INV)); > +return ret; > +} > +cond = TCG_COND_LTU; > +} else { > +cond = TCG_COND_LT; > +} > +tcg_debug_assert(arg2 <= 0x7ff); > +if (++arg2 == 0x800) { > +tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2); > +arg2 = TCG_REG_TMP0; > +c2 = false; > +} > +} else { > +TCGReg tmp = arg2; > +arg2 = arg1; > +arg1 = tmp; > +cond = tcg_swap_cond(cond);/* LE -> GE */ > +cond = tcg_invert_cond(cond); /* GE -> LT */ > +flags ^= SETCOND_INV; > +} > +break; > +default: > +break; > +} > + > +switch (cond) { > +case TCG_COND_NE: > +flags |= SETCOND_NEZ; > +if (!c2) { > +tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); > +} else if (arg2 == 0) { > +ret = arg1; > +} else { > +tcg_out_opc_reg(s, OPC_XORI, ret, arg1, arg2); > +} > +break; > + > +case TCG_COND_LT: > +if (c2) { > +tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2); > +} else { > +
Re: [PATCH 09/11] tcg/riscv: Improve setcond expansion
On 5/3/23 05:56, Richard Henderson wrote: Split out a helper function, tcg_out_setcond_int, which does not always produce the complete boolean result, but returns a set of flags to do so. Based on 21af16198425, the same improvement for loongarch64. Signed-off-by: Richard Henderson --- Reviewed-by: Daniel Henrique Barboza tcg/riscv/tcg-target.c.inc | 164 +++-- 1 file changed, 121 insertions(+), 43 deletions(-) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 044ddfb160..84b646105c 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -812,50 +812,128 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, tcg_out_opc_branch(s, op, arg1, arg2, 0); } -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, -TCGReg arg1, TCGReg arg2) +#define SETCOND_INVTCG_TARGET_NB_REGS +#define SETCOND_NEZ(SETCOND_INV << 1) +#define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) + +static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, tcg_target_long arg2, bool c2) { +int flags = 0; + switch (cond) { -case TCG_COND_EQ: -tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2); -tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1); -break; -case TCG_COND_NE: -tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2); -tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret); -break; -case TCG_COND_LT: -tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); -break; -case TCG_COND_GE: -tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); -tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); -break; -case TCG_COND_LE: -tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); -tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); -break; -case TCG_COND_GT: -tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); -break; -case TCG_COND_LTU: -tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); -break; -case TCG_COND_GEU: -tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); -tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); -break; -case TCG_COND_LEU: -tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); -tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); -break; -case TCG_COND_GTU: -tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); +case TCG_COND_EQ:/* -> NE */ +case TCG_COND_GE:/* -> LT */ +case TCG_COND_GEU: /* -> LTU */ +case TCG_COND_GT:/* -> LE */ +case TCG_COND_GTU: /* -> LEU */ +cond = tcg_invert_cond(cond); +flags ^= SETCOND_INV; break; default: - g_assert_not_reached(); - break; - } +break; +} + +switch (cond) { +case TCG_COND_LE: +case TCG_COND_LEU: +/* + * If we have a constant input, the most efficient way to implement + * LE is by adding 1 and using LT. Watch out for wrap around for LEU. + * We don't need to care for this for LE because the constant input + * is constrained to signed 12-bit, and 0x800 is representable in the + * temporary register. + */ +if (c2) { +if (cond == TCG_COND_LEU) { +/* unsigned <= -1 is true */ +if (arg2 == -1) { +tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV)); +return ret; +} +cond = TCG_COND_LTU; +} else { +cond = TCG_COND_LT; +} +tcg_debug_assert(arg2 <= 0x7ff); +if (++arg2 == 0x800) { +tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2); +arg2 = TCG_REG_TMP0; +c2 = false; +} +} else { +TCGReg tmp = arg2; +arg2 = arg1; +arg1 = tmp; +cond = tcg_swap_cond(cond);/* LE -> GE */ +cond = tcg_invert_cond(cond); /* GE -> LT */ +flags ^= SETCOND_INV; +} +break; +default: +break; +} + +switch (cond) { +case TCG_COND_NE: +flags |= SETCOND_NEZ; +if (!c2) { +tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); +} else if (arg2 == 0) { +ret = arg1; +} else { +tcg_out_opc_reg(s, OPC_XORI, ret, arg1, arg2); +} +break; + +case TCG_COND_LT: +if (c2) { +tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2); +} else { +tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); +} +break; + +case TCG_COND_LTU: +if (c2) { +tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, arg2); +} else { +tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); +} +break; + +
[PATCH 09/11] tcg/riscv: Improve setcond expansion
Split out a helper function, tcg_out_setcond_int, which does not always produce the complete boolean result, but returns a set of flags to do so. Based on 21af16198425, the same improvement for loongarch64. Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 164 +++-- 1 file changed, 121 insertions(+), 43 deletions(-) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 044ddfb160..84b646105c 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -812,50 +812,128 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, tcg_out_opc_branch(s, op, arg1, arg2, 0); } -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, -TCGReg arg1, TCGReg arg2) +#define SETCOND_INVTCG_TARGET_NB_REGS +#define SETCOND_NEZ(SETCOND_INV << 1) +#define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) + +static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, tcg_target_long arg2, bool c2) { +int flags = 0; + switch (cond) { -case TCG_COND_EQ: -tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2); -tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1); -break; -case TCG_COND_NE: -tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2); -tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret); -break; -case TCG_COND_LT: -tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); -break; -case TCG_COND_GE: -tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); -tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); -break; -case TCG_COND_LE: -tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); -tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); -break; -case TCG_COND_GT: -tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); -break; -case TCG_COND_LTU: -tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); -break; -case TCG_COND_GEU: -tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); -tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); -break; -case TCG_COND_LEU: -tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); -tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); -break; -case TCG_COND_GTU: -tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); +case TCG_COND_EQ:/* -> NE */ +case TCG_COND_GE:/* -> LT */ +case TCG_COND_GEU: /* -> LTU */ +case TCG_COND_GT:/* -> LE */ +case TCG_COND_GTU: /* -> LEU */ +cond = tcg_invert_cond(cond); +flags ^= SETCOND_INV; break; default: - g_assert_not_reached(); - break; - } +break; +} + +switch (cond) { +case TCG_COND_LE: +case TCG_COND_LEU: +/* + * If we have a constant input, the most efficient way to implement + * LE is by adding 1 and using LT. Watch out for wrap around for LEU. + * We don't need to care for this for LE because the constant input + * is constrained to signed 12-bit, and 0x800 is representable in the + * temporary register. + */ +if (c2) { +if (cond == TCG_COND_LEU) { +/* unsigned <= -1 is true */ +if (arg2 == -1) { +tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV)); +return ret; +} +cond = TCG_COND_LTU; +} else { +cond = TCG_COND_LT; +} +tcg_debug_assert(arg2 <= 0x7ff); +if (++arg2 == 0x800) { +tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2); +arg2 = TCG_REG_TMP0; +c2 = false; +} +} else { +TCGReg tmp = arg2; +arg2 = arg1; +arg1 = tmp; +cond = tcg_swap_cond(cond);/* LE -> GE */ +cond = tcg_invert_cond(cond); /* GE -> LT */ +flags ^= SETCOND_INV; +} +break; +default: +break; +} + +switch (cond) { +case TCG_COND_NE: +flags |= SETCOND_NEZ; +if (!c2) { +tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); +} else if (arg2 == 0) { +ret = arg1; +} else { +tcg_out_opc_reg(s, OPC_XORI, ret, arg1, arg2); +} +break; + +case TCG_COND_LT: +if (c2) { +tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2); +} else { +tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); +} +break; + +case TCG_COND_LTU: +if (c2) { +tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, arg2); +} else { +tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); +} +break; + +default: +g_assert_not_reached(); +} + +return ret | flags; +} + +static void