On 19/5/26 18:22, James Hilliard wrote:
From: Richard Henderson <[email protected]>
Zero operands produce a zero high and low product. One operands produce
a copy of the other operand and a zero or sign extension in the high
half.
Fold those cases during TCG optimization so wide-multiply idioms used by
target translators can collapse before code generation.
Signed-off-by: Richard Henderson <[email protected]>
---
Changes v7 -> v8:
- New patch from Richard Henderson's v7.5 multiplier rework.
---
tcg/optimize.c | 92 ++++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 60 insertions(+), 32 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index b1abec69a5..a42ab16fb4 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2173,45 +2173,73 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
{
swap_commutative(op->args[0], &op->args[2], &op->args[3]);
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
- uint64_t a = arg_const_val(op->args[2]);
+ if (arg_is_const(op->args[3])) {
uint64_t b = arg_const_val(op->args[3]);
- uint64_t h, l;
- TCGArg rl, rh;
+ TCGArg rl = op->args[0];
+ TCGArg rh = op->args[1];
TCGOp *op2;
- switch (op->opc) {
- case INDEX_op_mulu2:
- if (ctx->type == TCG_TYPE_I32) {
- l = (uint64_t)(uint32_t)a * (uint32_t)b;
- h = (int32_t)(l >> 32);
- l = (int32_t)l;
- } else {
- mulu64(&l, &h, a, b);
- }
- break;
- case INDEX_op_muls2:
- if (ctx->type == TCG_TYPE_I32) {
- l = (int64_t)(int32_t)a * (int32_t)b;
- h = l >> 32;
- l = (int32_t)l;
- } else {
- muls64(&l, &h, a, b);
+ if (arg_is_const(op->args[2])) {
+ uint64_t a = arg_const_val(op->args[2]);
+ uint64_t h, l;
+
+ switch (op->opc) {
+ case INDEX_op_mulu2:
+ if (ctx->type == TCG_TYPE_I32) {
+ l = (uint64_t)(uint32_t)a * (uint32_t)b;
+ h = (int32_t)(l >> 32);
+ l = (int32_t)l;
+ } else {
+ mulu64(&l, &h, a, b);
+ }
+ break;
+ case INDEX_op_muls2:
+ if (ctx->type == TCG_TYPE_I32) {
+ l = (int64_t)(int32_t)a * (int32_t)b;
+ h = l >> 32;
+ l = (int32_t)l;
+ } else {
+ muls64(&l, &h, a, b);
+ }
+ break;
+ default:
+ g_assert_not_reached();
}
- break;
- default:
- g_assert_not_reached();
- }
- rl = op->args[0];
- rh = op->args[1];
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
+ op2 = opt_insert_before(ctx, op, 0, 2);
+ tcg_opt_gen_movi(ctx, op2, rl, l);
+ tcg_opt_gen_movi(ctx, op, rh, h);
+ return true;
+ }
- /* The proper opcode is supplied by tcg_opt_gen_mov. */
- op2 = opt_insert_before(ctx, op, 0, 2);
+ if (b == 0) {
+ op2 = opt_insert_before(ctx, op, 0, 2);
+ tcg_opt_gen_movi(ctx, op2, rl, 0);
+ tcg_opt_gen_movi(ctx, op, rh, 0);
+ return true;
+ }
+ if (b == 1) {
+ op2 = opt_insert_before(ctx, op, 0, 2);
+ tcg_opt_gen_mov(ctx, op2, rl, op->args[2]);
+
+ switch (op->opc) {
+ case INDEX_op_mulu2:
+ tcg_opt_gen_movi(ctx, op, rh, 0);
+ break;
+ case INDEX_op_muls2:
+ op->opc = INDEX_op_sar;
+ op->args[0] = rh;
+ op->args[1] = rl;
+ op->args[2] =
+ arg_new_constant(ctx, tcg_type_size(ctx->type) * 8 - 1);
+ break;
+ default:
+ g_assert_not_reached();
+ }
- tcg_opt_gen_movi(ctx, op, rl, l);
- tcg_opt_gen_movi(ctx, op2, rh, h);
- return true;
+ return true;
+ }
}
return finish_folding(ctx, op);
}
Removing the indentation changes, the content of this patch is:
-- >8 --
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 3defc67c853..a42ab16fb4c 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2208,9 +2208,36 @@ static bool fold_multiply2(OptContext *ctx, TCGOp
*op)
/* The proper opcode is supplied by tcg_opt_gen_mov. */
op2 = opt_insert_before(ctx, op, 0, 2);
- tcg_opt_gen_movi(ctx, op, rl, l);
- tcg_opt_gen_movi(ctx, op2, rh, h);
+ tcg_opt_gen_movi(ctx, op2, rl, l);
+ tcg_opt_gen_movi(ctx, op, rh, h);
+ return true;
+ }
+
+ if (b == 0) {
+ op2 = opt_insert_before(ctx, op, 0, 2);
+ tcg_opt_gen_movi(ctx, op2, rl, 0);
+ tcg_opt_gen_movi(ctx, op, rh, 0);
+ return true;
+ }
+ if (b == 1) {
+ op2 = opt_insert_before(ctx, op, 0, 2);
+ tcg_opt_gen_mov(ctx, op2, rl, op->args[2]);
+
+ switch (op->opc) {
+ case INDEX_op_mulu2:
+ tcg_opt_gen_movi(ctx, op, rh, 0);
+ break;
+ case INDEX_op_muls2:
+ op->opc = INDEX_op_sar;
+ op->args[0] = rh;
+ op->args[1] = rl;
+ op->args[2] =
+ arg_new_constant(ctx, tcg_type_size(ctx->type) * 8
- 1);
+ break;
+ default:
+ g_assert_not_reached();
+ }
return true;
}
}
---
I don't understand this change:
- tcg_opt_gen_movi(ctx, op, rl, l);
- tcg_opt_gen_movi(ctx, op2, rh, h);
+ tcg_opt_gen_movi(ctx, op2, rl, l);
+ tcg_opt_gen_movi(ctx, op, rh, h);