From: Richard Henderson <[email protected]> Zero operands produce a zero high and low product. One operands produce a copy of the other operand and a zero or sign extension in the high half.
Fold those cases during TCG optimization so wide-multiply idioms used by target translators can collapse before code generation. Signed-off-by: Richard Henderson <[email protected]> --- Changes v9 -> v10: - Restore the original constant-fold output ordering. Changes v7 -> v8: - New patch from Richard Henderson's v7.5 multiplier rework. --- tcg/optimize.c | 92 ++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 32 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index b1abec69a5..fcdef25bee 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -2173,45 +2173,73 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) { swap_commutative(op->args[0], &op->args[2], &op->args[3]); - if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { - uint64_t a = arg_const_val(op->args[2]); + if (arg_is_const(op->args[3])) { uint64_t b = arg_const_val(op->args[3]); - uint64_t h, l; - TCGArg rl, rh; + TCGArg rl = op->args[0]; + TCGArg rh = op->args[1]; TCGOp *op2; - switch (op->opc) { - case INDEX_op_mulu2: - if (ctx->type == TCG_TYPE_I32) { - l = (uint64_t)(uint32_t)a * (uint32_t)b; - h = (int32_t)(l >> 32); - l = (int32_t)l; - } else { - mulu64(&l, &h, a, b); - } - break; - case INDEX_op_muls2: - if (ctx->type == TCG_TYPE_I32) { - l = (int64_t)(int32_t)a * (int32_t)b; - h = l >> 32; - l = (int32_t)l; - } else { - muls64(&l, &h, a, b); + if (arg_is_const(op->args[2])) { + uint64_t a = arg_const_val(op->args[2]); + uint64_t h, l; + + switch (op->opc) { + case INDEX_op_mulu2: + if (ctx->type == TCG_TYPE_I32) { + l = (uint64_t)(uint32_t)a * (uint32_t)b; + h = (int32_t)(l >> 32); + l = (int32_t)l; + } else { + mulu64(&l, &h, a, b); + } + break; + case INDEX_op_muls2: + if (ctx->type == TCG_TYPE_I32) { + l = (int64_t)(int32_t)a * (int32_t)b; + h = l >> 32; + l = (int32_t)l; + } else { + muls64(&l, &h, a, b); + } + break; + default: + g_assert_not_reached(); } - break; - default: - g_assert_not_reached(); - } - rl = op->args[0]; - rh = op->args[1]; + /* The proper opcode is supplied by tcg_opt_gen_mov. */ + op2 = opt_insert_before(ctx, op, 0, 2); + tcg_opt_gen_movi(ctx, op, rl, l); + tcg_opt_gen_movi(ctx, op2, rh, h); + return true; + } - /* The proper opcode is supplied by tcg_opt_gen_mov. */ - op2 = opt_insert_before(ctx, op, 0, 2); + if (b == 0) { + op2 = opt_insert_before(ctx, op, 0, 2); + tcg_opt_gen_movi(ctx, op2, rl, 0); + tcg_opt_gen_movi(ctx, op, rh, 0); + return true; + } + if (b == 1) { + op2 = opt_insert_before(ctx, op, 0, 2); + tcg_opt_gen_mov(ctx, op2, rl, op->args[2]); + + switch (op->opc) { + case INDEX_op_mulu2: + tcg_opt_gen_movi(ctx, op, rh, 0); + break; + case INDEX_op_muls2: + op->opc = INDEX_op_sar; + op->args[0] = rh; + op->args[1] = rl; + op->args[2] = + arg_new_constant(ctx, tcg_type_size(ctx->type) * 8 - 1); + break; + default: + g_assert_not_reached(); + } - tcg_opt_gen_movi(ctx, op, rl, l); - tcg_opt_gen_movi(ctx, op2, rh, h); - return true; + return true; + } } return finish_folding(ctx, op); } -- 2.54.0
