Better constraint for tcg_out_cmp, based on the comparison. We can't yet remove the fallback to load constants into a scratch because of tcg_out_cmp2, but that path should not be as frequent.
Reviewed-by: Philippe Mathieu-Daudé <phi...@linaro.org> Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- tcg/ppc/tcg-target-con-set.h | 5 ++-- tcg/ppc/tcg-target-con-str.h | 1 + tcg/ppc/tcg-target.c.inc | 48 ++++++++++++++++++++++++++++++------ 3 files changed, 44 insertions(+), 10 deletions(-) diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h index cb47b29452..9f99bde505 100644 --- a/tcg/ppc/tcg-target-con-set.h +++ b/tcg/ppc/tcg-target-con-set.h @@ -11,7 +11,7 @@ */ C_O0_I1(r) C_O0_I2(r, r) -C_O0_I2(r, ri) +C_O0_I2(r, rC) C_O0_I2(v, r) C_O0_I3(r, r, r) C_O0_I3(o, m, r) @@ -26,13 +26,14 @@ C_O1_I2(r, rI, ri) C_O1_I2(r, rI, rT) C_O1_I2(r, r, r) C_O1_I2(r, r, ri) +C_O1_I2(r, r, rC) C_O1_I2(r, r, rI) C_O1_I2(r, r, rT) C_O1_I2(r, r, rU) C_O1_I2(r, r, rZW) C_O1_I2(v, v, v) C_O1_I3(v, v, v, v) -C_O1_I4(r, r, ri, rZ, rZ) +C_O1_I4(r, r, rC, rZ, rZ) C_O1_I4(r, r, r, ri, ri) C_O2_I1(r, r, r) C_N1O1_I1(o, m, r) diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h index 20846901de..16b687216e 100644 --- a/tcg/ppc/tcg-target-con-str.h +++ b/tcg/ppc/tcg-target-con-str.h @@ -16,6 +16,7 @@ REGS('v', ALL_VECTOR_REGS) * Define constraint letters for constants: * CONST(letter, TCG_CT_CONST_* bit set) */ +CONST('C', TCG_CT_CONST_CMP) CONST('I', TCG_CT_CONST_S16) CONST('M', TCG_CT_CONST_MONE) CONST('T', TCG_CT_CONST_S32) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 26e0bc31d7..535ef2cbe7 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -92,11 +92,13 @@ #define SZR (TCG_TARGET_REG_BITS / 8) #define TCG_CT_CONST_S16 0x100 +#define TCG_CT_CONST_U16 0x200 #define TCG_CT_CONST_S32 0x400 #define TCG_CT_CONST_U32 0x800 #define TCG_CT_CONST_ZERO 0x1000 #define TCG_CT_CONST_MONE 0x2000 #define TCG_CT_CONST_WSZ 0x4000 +#define TCG_CT_CONST_CMP 0x8000 #define ALL_GENERAL_REGS 0xffffffffu #define ALL_VECTOR_REGS 0xffffffff00000000ull @@ -296,9 +298,35 @@ static bool tcg_target_const_match(int64_t sval, int ct, sval = (int32_t)sval; } + if (ct & TCG_CT_CONST_CMP) { + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + ct |= TCG_CT_CONST_S16 | TCG_CT_CONST_U16; + break; + case TCG_COND_LT: + case TCG_COND_GE: + case TCG_COND_LE: + case TCG_COND_GT: + ct |= TCG_CT_CONST_S16; + break; + case TCG_COND_LTU: + case TCG_COND_GEU: + case TCG_COND_LEU: + case TCG_COND_GTU: + ct |= TCG_CT_CONST_U16; + break; + default: + g_assert_not_reached(); + } + } + if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) { return 1; } + if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) { + return 1; + } if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) { return 1; } @@ -1682,7 +1710,10 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); - /* Simplify the comparisons below wrt CMPI. */ + /* + * Simplify the comparisons below wrt CMPI. + * All of the tests are 16-bit, so a 32-bit sign extend always works. + */ if (type == TCG_TYPE_I32) { arg2 = (int32_t)arg2; } @@ -3991,8 +4022,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_sar_i32: case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: - case INDEX_op_setcond_i32: - case INDEX_op_negsetcond_i32: case INDEX_op_and_i64: case INDEX_op_andc_i64: case INDEX_op_shl_i64: @@ -4000,8 +4029,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_sar_i64: case INDEX_op_rotl_i64: case INDEX_op_rotr_i64: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, ri); case INDEX_op_mul_i32: @@ -4045,11 +4072,16 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: - return C_O0_I2(r, ri); - + return C_O0_I2(r, rC); + case INDEX_op_setcond_i32: + case INDEX_op_setcond_i64: + case INDEX_op_negsetcond_i32: + case INDEX_op_negsetcond_i64: + return C_O1_I2(r, r, rC); case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: - return C_O1_I4(r, r, ri, rZ, rZ); + return C_O1_I4(r, r, rC, rZ, rZ); + case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: return C_O1_I2(r, 0, rZ); -- 2.34.1