Use extract2 to implement rotri. To make this easier, redefine rotli in terms of rotri, rather than the reverse.
Signed-off-by: Richard Henderson <[email protected]> --- tcg/tcg-op.c | 52 ++++++++++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 8a4fd14ad5..078adce610 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -826,23 +826,12 @@ void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { tcg_debug_assert(arg2 >= 0 && arg2 < 32); - /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i32(ret, arg1); } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I32, 0)) { - TCGv_i32 t0 = tcg_constant_i32(arg2); - tcg_gen_op3_i32(INDEX_op_rotl, ret, arg1, t0); - } else if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I32, 0)) { - TCGv_i32 t0 = tcg_constant_i32(32 - arg2); - tcg_gen_op3_i32(INDEX_op_rotr, ret, arg1, t0); + tcg_gen_op3_i32(INDEX_op_rotl, ret, arg1, tcg_constant_i32(arg2)); } else { - TCGv_i32 t0 = tcg_temp_ebb_new_i32(); - TCGv_i32 t1 = tcg_temp_ebb_new_i32(); - tcg_gen_shli_i32(t0, arg1, arg2); - tcg_gen_shri_i32(t1, arg1, 32 - arg2); - tcg_gen_or_i32(ret, t0, t1); - tcg_temp_free_i32(t0); - tcg_temp_free_i32(t1); + tcg_gen_rotri_i32(ret, arg1, -arg2 & 31); } } @@ -870,7 +859,16 @@ void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { tcg_debug_assert(arg2 >= 0 && arg2 < 32); - tcg_gen_rotli_i32(ret, arg1, -arg2 & 31); + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_rotr, ret, arg1, tcg_constant_i32(arg2)); + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_rotl, ret, arg1, tcg_constant_i32(32 - arg2)); + } else { + /* Do not recurse with the rotri simplification. */ + tcg_gen_op4i_i32(INDEX_op_extract2, ret, arg1, arg1, arg2); + } } void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, @@ -2042,23 +2040,12 @@ void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { tcg_debug_assert(arg2 >= 0 && arg2 < 64); - /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i64(ret, arg1); } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I64, 0)) { - TCGv_i64 t0 = tcg_constant_i64(arg2); - tcg_gen_op3_i64(INDEX_op_rotl, ret, arg1, t0); - } else if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I64, 0)) { - TCGv_i64 t0 = tcg_constant_i64(64 - arg2); - tcg_gen_op3_i64(INDEX_op_rotr, ret, arg1, t0); + tcg_gen_op3_i64(INDEX_op_rotl, ret, arg1, tcg_constant_i64(arg2)); } else { - TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - TCGv_i64 t1 = tcg_temp_ebb_new_i64(); - tcg_gen_shli_i64(t0, arg1, arg2); - tcg_gen_shri_i64(t1, arg1, 64 - arg2); - tcg_gen_or_i64(ret, t0, t1); - tcg_temp_free_i64(t0); - tcg_temp_free_i64(t1); + tcg_gen_rotri_i64(ret, arg1, -arg2 & 63); } } @@ -2086,7 +2073,16 @@ void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { tcg_debug_assert(arg2 >= 0 && arg2 < 64); - tcg_gen_rotli_i64(ret, arg1, -arg2 & 63); + if (arg2 == 0) { + tcg_gen_mov_i64(ret, arg1); + } else if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_rotr, ret, arg1, tcg_constant_i64(arg2)); + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_rotl, ret, arg1, tcg_constant_i64(64 - arg2)); + } else { + /* Do not recurse with the rotri simplification. */ + tcg_gen_op4i_i64(INDEX_op_extract2, ret, arg1, arg1, arg2); + } } void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, -- 2.43.0
