Rather than allow arbitrary shift+trunc, only concern ourselves with low and high parts. This is all that was being used anyway.
Signed-off-by: Richard Henderson <r...@twiddle.net> --- target-tricore/translate.c | 12 ++++++------ tcg/README | 14 ++++++++++---- tcg/aarch64/tcg-target.h | 3 ++- tcg/i386/tcg-target.h | 3 ++- tcg/ia64/tcg-target.h | 3 ++- tcg/optimize.c | 22 +++++++++++----------- tcg/ppc/tcg-target.h | 3 ++- tcg/s390/tcg-target.h | 3 ++- tcg/sparc/tcg-target.c | 14 +++++++------- tcg/sparc/tcg-target.h | 3 ++- tcg/tcg-op.c | 38 +++++++++++++++++++------------------- tcg/tcg-op.h | 5 +++-- tcg/tcg-opc.h | 7 +++++-- tcg/tcg.h | 3 ++- tcg/tci/tcg-target.h | 3 ++- 15 files changed, 77 insertions(+), 59 deletions(-) diff --git a/target-tricore/translate.c b/target-tricore/translate.c index 7dc7a32..70f0930 100644 --- a/target-tricore/translate.c +++ b/target-tricore/translate.c @@ -457,11 +457,11 @@ gen_add64_d(TCGv_i64 ret, TCGv_i64 r1, TCGv_i64 r2) tcg_gen_xor_i64(t1, result, r1); tcg_gen_xor_i64(t0, r1, r2); tcg_gen_andc_i64(t1, t1, t0); - tcg_gen_trunc_shr_i64_i32(cpu_PSW_V, t1, 32); + tcg_gen_extrh_i64_i32(cpu_PSW_V, t1); /* calc SV bit */ tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* calc AV/SAV bits */ - tcg_gen_trunc_shr_i64_i32(temp, result, 32); + tcg_gen_extrh_i64_i32(temp, result); tcg_gen_add_tl(cpu_PSW_AV, temp, temp); tcg_gen_xor_tl(cpu_PSW_AV, temp, cpu_PSW_AV); /* calc SAV */ @@ -1273,7 +1273,7 @@ gen_madd64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, tcg_gen_xor_i64(t3, t4, t1); tcg_gen_xor_i64(t2, t1, t2); tcg_gen_andc_i64(t3, t3, t2); - tcg_gen_trunc_shr_i64_i32(cpu_PSW_V, t3, 32); + tcg_gen_extrh_i64_i32(cpu_PSW_V, t3); /* We produce an overflow on the host if the mul before was (0x80000000 * 0x80000000) << 1). If this is the case, we negate the ovf. */ @@ -1630,11 +1630,11 @@ gen_sub64_d(TCGv_i64 ret, TCGv_i64 r1, TCGv_i64 r2) tcg_gen_xor_i64(t1, result, r1); tcg_gen_xor_i64(t0, r1, r2); tcg_gen_and_i64(t1, t1, t0); - tcg_gen_trunc_shr_i64_i32(cpu_PSW_V, t1, 32); + tcg_gen_extrh_i64_i32(cpu_PSW_V, t1); /* calc SV bit */ tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* calc AV/SAV bits */ - tcg_gen_trunc_shr_i64_i32(temp, result, 32); + tcg_gen_extrh_i64_i32(temp, result); tcg_gen_add_tl(cpu_PSW_AV, temp, temp); tcg_gen_xor_tl(cpu_PSW_AV, temp, cpu_PSW_AV); /* calc SAV */ @@ -2126,7 +2126,7 @@ gen_msub64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, tcg_gen_xor_i64(t3, t4, t1); tcg_gen_xor_i64(t2, t1, t2); tcg_gen_and_i64(t3, t3, t2); - tcg_gen_trunc_shr_i64_i32(cpu_PSW_V, t3, 32); + tcg_gen_extrh_i64_i32(cpu_PSW_V, t3); /* We produce an overflow on the host if the mul before was (0x80000000 * 0x80000000) << 1). If this is the case, we negate the ovf. */ diff --git a/tcg/README b/tcg/README index a550ff1..e3bec27 100644 --- a/tcg/README +++ b/tcg/README @@ -314,11 +314,17 @@ This operation would be equivalent to dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00) -* trunc_shr_i32 t0, t1, pos +* extrl_i64_i32 t0, t1 -For 64-bit hosts only, right shift the 64-bit input T1 by POS and -truncate to 32-bit output T0. Depending on the host, this may be -a simple mov/shift, or may require additional canonicalization. +For 64-bit hosts only, extract the low 32-bits of input T1 and place it +into 32-bit output T0. Depending on the host, this may be a simple move, +or may require additional canonicalization. + +* extrh_i64_i32 t0, t1 + +For 64-bit hosts only, extract the high 32-bits of input T1 and place it +into 32-bit output T0. Depending on the host, this may be a simple shift, +or may require additional canonicalization. ********* Conditional moves diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 8aec04d..19a04a6 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -70,7 +70,8 @@ typedef enum { #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 25b5133..92be341 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -102,7 +102,8 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_ext8s_i64 1 diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index a04ed81..ae9b79f 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -160,7 +160,8 @@ typedef enum { #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_mulsh_i64 0 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) diff --git a/tcg/optimize.c b/tcg/optimize.c index 48103b2..47415a8 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -301,7 +301,6 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) case INDEX_op_shr_i32: return (uint32_t)x >> (y & 31); - case INDEX_op_trunc_shr_i32: case INDEX_op_shr_i64: return (uint64_t)x >> (y & 63); @@ -359,9 +358,13 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) case INDEX_op_ext32s_i64: return (int32_t)x; + case INDEX_op_extrl_i64_i32: case INDEX_op_ext32u_i64: return (uint32_t)x; + case INDEX_op_extrh_i64_i32: + return (uint64_t)x >> 32; + case INDEX_op_muluh_i32: return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; case INDEX_op_mulsh_i32: @@ -880,8 +883,11 @@ void tcg_optimize(TCGContext *s) } break; - case INDEX_op_trunc_shr_i32: - mask = (uint64_t)temps[args[1]].mask >> args[2]; + case INDEX_op_extrl_i64_i32: + mask = (uint32_t)temps[args[1]].mask; + break; + case INDEX_op_extrh_i64_i32: + mask = (uint64_t)temps[args[1]].mask >> 32; break; CASE_OP_32_64(shl): @@ -1021,6 +1027,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(ext16u): case INDEX_op_ext32s_i64: case INDEX_op_ext32u_i64: + case INDEX_op_extrl_i64_i32: + case INDEX_op_extrh_i64_i32: if (temp_is_const(args[1])) { tmp = do_constant_folding(opc, temps[args[1]].val, 0); tcg_opt_gen_movi(s, op, args, args[0], tmp); @@ -1028,14 +1036,6 @@ void tcg_optimize(TCGContext *s) } goto do_default; - case INDEX_op_trunc_shr_i32: - if (temp_is_const(args[1])) { - tmp = do_constant_folding(opc, temps[args[1]].val, args[2]); - tcg_opt_gen_movi(s, op, args, args[0], tmp); - break; - } - goto do_default; - CASE_OP_32_64(add): CASE_OP_32_64(sub): CASE_OP_32_64(mul): diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 7ce7048..b4f0818 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -77,7 +77,8 @@ typedef enum { #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 1 diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 91576d5..d9dc038 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -72,7 +72,8 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index 1a870a8..37eea65 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -1413,12 +1413,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext32u_i64: tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL); break; - case INDEX_op_trunc_shr_i32: - if (a2 == 0) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - } else { - tcg_out_arithi(s, a0, a1, a2, SHIFT_SRLX); - } + case INDEX_op_extrl_i64_i32: + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + break; + case INDEX_op_extrh_i64_i32: + tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX); break; case INDEX_op_brcond_i64: @@ -1533,7 +1532,8 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_ext32s_i64, { "R", "r" } }, { INDEX_op_ext32u_i64, { "R", "r" } }, - { INDEX_op_trunc_shr_i32, { "r", "R" } }, + { INDEX_op_extrl_i64_i32, { "r", "R" } }, + { INDEX_op_extrh_i64_i32, { "r", "R" } }, { INDEX_op_brcond_i64, { "RZ", "RJ" } }, { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } }, diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index f584de4..2cd72d2 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -118,7 +118,8 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i32 1 +#define TCG_TARGET_HAS_extrl_i64_i32 1 +#define TCG_TARGET_HAS_extrh_i64_i32 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 45098c3..9d7fa89 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1737,28 +1737,28 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) /* Size changing operations. */ -void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned count) +void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg) { - tcg_debug_assert(count < 64); if (TCG_TARGET_REG_BITS == 32) { - if (count >= 32) { - tcg_gen_shri_i32(ret, TCGV_HIGH(arg), count - 32); - } else if (count == 0) { - tcg_gen_mov_i32(ret, TCGV_LOW(arg)); - } else { - TCGv_i64 t = tcg_temp_new_i64(); - tcg_gen_shri_i64(t, arg, count); - tcg_gen_mov_i32(ret, TCGV_LOW(t)); - tcg_temp_free_i64(t); - } - } else if (TCG_TARGET_HAS_trunc_shr_i32) { - tcg_gen_op3i_i32(INDEX_op_trunc_shr_i32, ret, - MAKE_TCGV_I32(GET_TCGV_I64(arg)), count); - } else if (count == 0) { + tcg_gen_mov_i32(ret, TCGV_LOW(arg)); + } else if (TCG_TARGET_HAS_extrl_i64_i32) { + tcg_gen_op2(&tcg_ctx, INDEX_op_extrl_i64_i32, + GET_TCGV_I32(ret), GET_TCGV_I64(arg)); + } else { tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg))); + } +} + +void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg) +{ + if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_mov_i32(ret, TCGV_HIGH(arg)); + } else if (TCG_TARGET_HAS_extrh_i64_i32) { + tcg_gen_op2(&tcg_ctx, INDEX_op_extrh_i64_i32, + GET_TCGV_I32(ret), GET_TCGV_I64(arg)); } else { TCGv_i64 t = tcg_temp_new_i64(); - tcg_gen_shri_i64(t, arg, count); + tcg_gen_shri_i64(t, arg, 32); tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(t))); tcg_temp_free_i64(t); } @@ -1820,8 +1820,8 @@ void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg) tcg_gen_mov_i32(lo, TCGV_LOW(arg)); tcg_gen_mov_i32(hi, TCGV_HIGH(arg)); } else { - tcg_gen_trunc_shr_i64_i32(lo, arg, 0); - tcg_gen_trunc_shr_i64_i32(hi, arg, 32); + tcg_gen_extrl_i64_i32(lo, arg); + tcg_gen_extrh_i64_i32(hi, arg); } } diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index d1d763f..6b59eed 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -684,7 +684,8 @@ static inline void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg) void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg); void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg); void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high); -void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned int c); +void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg); +void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg); void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg); void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg); @@ -695,7 +696,7 @@ static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi) static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg) { - tcg_gen_trunc_shr_i64_i32(ret, arg, 0); + tcg_gen_extrl_i64_i32(ret, arg); } /* QEMU specific operations. */ diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 13ccb60..59aa84f 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -138,8 +138,11 @@ DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64)) -DEF(trunc_shr_i32, 1, 1, 1, - IMPL(TCG_TARGET_HAS_trunc_shr_i32) +DEF(extrl_i64_i32, 1, 1, 0, + IMPL(TCG_TARGET_HAS_extrl_i64_i32) + | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) +DEF(extrh_i64_i32, 1, 1, 0, + IMPL(TCG_TARGET_HAS_extrh_i64_i32) | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64) diff --git a/tcg/tcg.h b/tcg/tcg.h index 231a781..f437824 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -66,7 +66,8 @@ typedef uint64_t TCGRegSet; #if TCG_TARGET_REG_BITS == 32 /* Turn some undef macros into false macros. */ -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div_i64 0 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_div2_i64 0 diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index cbf3f9b..77e5952 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -84,7 +84,8 @@ #define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -- 2.4.3