The expansions that we chose in tcg-op.c may be less than optimial. Delay lowering until optimize, so that we have propagated constants and have computed known zero/one masks.
Reviewed-by: Manos Pitsidianakis <[email protected]> Signed-off-by: Richard Henderson <[email protected]> --- tcg/optimize.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++---- tcg/tcg-op.c | 9 ++------ 2 files changed, 60 insertions(+), 12 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 890c8068fb..e6a16921c9 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1933,21 +1933,74 @@ static bool fold_extract2(OptContext *ctx, TCGOp *op) uint64_t z2 = t2->z_mask; uint64_t o1 = t1->o_mask; uint64_t o2 = t2->o_mask; + uint64_t zr, or; int shr = op->args[3]; + int shl; if (ctx->type == TCG_TYPE_I32) { z1 = (uint32_t)z1 >> shr; o1 = (uint32_t)o1 >> shr; - z2 = (uint64_t)((int32_t)z2 << (32 - shr)); - o2 = (uint64_t)((int32_t)o2 << (32 - shr)); + shl = 32 - shr; + z2 = (uint64_t)((int32_t)z2 << shl); + o2 = (uint64_t)((int32_t)o2 << shl); } else { z1 >>= shr; o1 >>= shr; - z2 <<= 64 - shr; - o2 <<= 64 - shr; + shl = 64 - shr; + z2 <<= shl; + o2 <<= shl; + } + zr = z1 | z2; + or = o1 | o2; + + if (zr == or) { + return tcg_opt_gen_movi(ctx, op, op->args[0], zr); } - return fold_masks_zo(ctx, op, z1 | z2, o1 | o2); + if (z2 == 0) { + /* High part zeros folds to simple right shift. */ + op->opc = INDEX_op_shr; + op->args[2] = arg_new_constant(ctx, shr); + } else if (z1 == 0) { + /* Low part zeros folds to simple left shift. */ + op->opc = INDEX_op_shl; + op->args[1] = op->args[2]; + op->args[2] = arg_new_constant(ctx, shl); + } else if (!tcg_op_supported(INDEX_op_extract2, ctx->type, 0)) { + TCGArg tmp = arg_new_temp(ctx); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3); + + op2->args[0] = tmp; + op2->args[1] = op->args[1]; + op2->args[2] = arg_new_constant(ctx, shr); + + if (TCG_TARGET_deposit_valid(ctx->type, shl, shr)) { + /* + * Deposit has more arguments than extract2, + * so we need to create a new TCGOp. + */ + op2 = opt_insert_before(ctx, op, INDEX_op_deposit, 5); + op2->args[0] = op->args[0]; + op2->args[1] = tmp; + op2->args[2] = op->args[2]; + op2->args[3] = shl; + op2->args[4] = shr; + + tcg_op_remove(ctx->tcg, op); + op = op2; + } else { + op2 = opt_insert_before(ctx, op, INDEX_op_shl, 3); + op2->args[0] = op->args[0]; + op2->args[1] = op->args[2]; + op2->args[2] = arg_new_constant(ctx, shl); + + op->opc = INDEX_op_or; + op->args[1] = op->args[0]; + op->args[2] = tmp; + } + } + + return fold_masks_zo(ctx, op, zr, or); } static bool fold_exts(OptContext *ctx, TCGOp *op) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 96f72ba381..8a4fd14ad5 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1000,13 +1000,8 @@ void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah, tcg_gen_mov_i32(ret, ah); } else if (al == ah) { tcg_gen_rotri_i32(ret, al, ofs); - } else if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I32, 0)) { - tcg_gen_op4i_i32(INDEX_op_extract2, ret, al, ah, ofs); } else { - TCGv_i32 t0 = tcg_temp_ebb_new_i32(); - tcg_gen_shri_i32(t0, al, ofs); - tcg_gen_deposit_i32(ret, t0, ah, 32 - ofs, ofs); - tcg_temp_free_i32(t0); + tcg_gen_op4i_i32(INDEX_op_extract2, ret, al, ah, ofs); } } @@ -2221,7 +2216,7 @@ void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah, tcg_gen_mov_i64(ret, ah); } else if (al == ah) { tcg_gen_rotri_i64(ret, al, ofs); - } else if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I64, 0)) { + } else if (TCG_TARGET_REG_BITS == 64) { tcg_gen_op4i_i64(INDEX_op_extract2, ret, al, ah, ofs); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); -- 2.43.0
