Re: [PATCH v4 099/163] tcg: Convert extrl_i64_i32 to TCGOutOpUnary
On 4/15/25 12:24, Richard Henderson wrote:
Drop the cast from TCGv_i64 to TCGv_i32 in tcg_gen_extrl_i64_i32
an emit extrl_i64_i32 unconditionally. Move that special case
to tcg_gen_code when we find out if the output is live or dead.
In this way even hosts that canonicalize truncations can make
use of a store directly from the 64-bit host register.
Signed-off-by: Richard Henderson
---
tcg/tcg-op.c | 4 +---
tcg/tcg.c| 35 +++-
tcg/aarch64/tcg-target.c.inc | 1 -
tcg/i386/tcg-target.c.inc| 4
tcg/loongarch64/tcg-target.c.inc | 2 --
tcg/mips/tcg-target.c.inc| 2 --
tcg/ppc/tcg-target.c.inc | 1 -
tcg/riscv/tcg-target.c.inc | 2 --
tcg/s390x/tcg-target.c.inc | 1 -
tcg/tci/tcg-target.c.inc | 1 -
10 files changed, 31 insertions(+), 22 deletions(-)
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index d3f3c9d248..7ecd1f6c8f 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -2962,11 +2962,9 @@ void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
{
if (TCG_TARGET_REG_BITS == 32) {
tcg_gen_mov_i32(ret, TCGV_LOW(arg));
-} else if (TCG_TARGET_HAS_extr_i64_i32) {
+} else {
tcg_gen_op2(INDEX_op_extrl_i64_i32, TCG_TYPE_I32,
tcgv_i32_arg(ret), tcgv_i64_arg(arg));
-} else {
-tcg_gen_mov_i32(ret, (TCGv_i32)arg);
}
}
diff --git a/tcg/tcg.c b/tcg/tcg.c
index b6c1efa828..84083d133d 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1093,6 +1093,16 @@ static const TCGOutOpUnary outop_extu_i32_i64 = {
.base.static_constraint = C_O1_I1(r, r),
.out_rr = tgen_extu_i32_i64,
};
+
+static void tgen_extrl_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
+{
+tcg_out_extrl_i64_i32(s, a0, a1);
+}
+
+static const TCGOutOpUnary outop_extrl_i64_i32 = {
+.base.static_constraint = C_O1_I1(r, r),
+.out_rr = TCG_TARGET_HAS_extr_i64_i32 ? tgen_extrl_i64_i32 : NULL,
+};
#endif
/*
@@ -1151,6 +1161,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = {
OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64),
OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64),
OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64),
+OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32),
#endif
};
@@ -2400,12 +2411,12 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
case INDEX_op_st_i64:
case INDEX_op_ext_i32_i64:
case INDEX_op_extu_i32_i64:
+case INDEX_op_extrl_i64_i32:
case INDEX_op_deposit_i64:
return TCG_TARGET_REG_BITS == 64;
case INDEX_op_extract2_i64:
return TCG_TARGET_HAS_extract2_i64;
-case INDEX_op_extrl_i64_i32:
case INDEX_op_extrh_i64_i32:
return TCG_TARGET_HAS_extr_i64_i32;
case INDEX_op_add2_i64:
@@ -5438,10 +5449,6 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp
*op)
/* emit instruction */
TCGType type = TCGOP_TYPE(op);
switch (op->opc) {
-case INDEX_op_extrl_i64_i32:
-tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
-break;
-
case INDEX_op_add:
case INDEX_op_and:
case INDEX_op_andc:
@@ -5499,6 +5506,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp
*op)
case INDEX_op_bswap64:
case INDEX_op_ext_i32_i64:
case INDEX_op_extu_i32_i64:
+case INDEX_op_extrl_i64_i32:
assert(TCG_TARGET_REG_BITS == 64);
/* fall through */
case INDEX_op_ctpop:
@@ -6657,6 +6665,22 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb,
uint64_t pc_start)
TCGOpcode opc = op->opc;
switch (opc) {
+case INDEX_op_extrl_i64_i32:
+assert(TCG_TARGET_REG_BITS == 64);
+/*
+ * If TCG_TYPE_I32 is represented in some canonical form,
+ * e.g. zero or sign-extended, then emit as a unary op.
+ * Otherwise we can treat this as a plain move.
+ * If the output dies, treat this as a plain move, because
+ * this will be implemented with a store.
+ */
+if (TCG_TARGET_HAS_extr_i64_i32) {
+TCGLifeData arg_life = op->life;
+if (!IS_DEAD_ARG(0)) {
+goto do_default;
+}
+}
+/* fall through */
case INDEX_op_mov:
case INDEX_op_mov_vec:
tcg_reg_alloc_mov(s, op);
@@ -6699,6 +6723,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb,
uint64_t pc_start)
}
/* fall through */
default:
+do_default:
/* Sanity check that we've not introduced any unhandled opcodes.
*/
tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
TCGOP_FLAGS(op)));
diff --git a/tcg/aarch64/tcg-target.c.i
[PATCH v4 099/163] tcg: Convert extrl_i64_i32 to TCGOutOpUnary
Drop the cast from TCGv_i64 to TCGv_i32 in tcg_gen_extrl_i64_i32
an emit extrl_i64_i32 unconditionally. Move that special case
to tcg_gen_code when we find out if the output is live or dead.
In this way even hosts that canonicalize truncations can make
use of a store directly from the 64-bit host register.
Signed-off-by: Richard Henderson
---
tcg/tcg-op.c | 4 +---
tcg/tcg.c| 35 +++-
tcg/aarch64/tcg-target.c.inc | 1 -
tcg/i386/tcg-target.c.inc| 4
tcg/loongarch64/tcg-target.c.inc | 2 --
tcg/mips/tcg-target.c.inc| 2 --
tcg/ppc/tcg-target.c.inc | 1 -
tcg/riscv/tcg-target.c.inc | 2 --
tcg/s390x/tcg-target.c.inc | 1 -
tcg/tci/tcg-target.c.inc | 1 -
10 files changed, 31 insertions(+), 22 deletions(-)
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index d3f3c9d248..7ecd1f6c8f 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -2962,11 +2962,9 @@ void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
{
if (TCG_TARGET_REG_BITS == 32) {
tcg_gen_mov_i32(ret, TCGV_LOW(arg));
-} else if (TCG_TARGET_HAS_extr_i64_i32) {
+} else {
tcg_gen_op2(INDEX_op_extrl_i64_i32, TCG_TYPE_I32,
tcgv_i32_arg(ret), tcgv_i64_arg(arg));
-} else {
-tcg_gen_mov_i32(ret, (TCGv_i32)arg);
}
}
diff --git a/tcg/tcg.c b/tcg/tcg.c
index b6c1efa828..84083d133d 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1093,6 +1093,16 @@ static const TCGOutOpUnary outop_extu_i32_i64 = {
.base.static_constraint = C_O1_I1(r, r),
.out_rr = tgen_extu_i32_i64,
};
+
+static void tgen_extrl_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
+{
+tcg_out_extrl_i64_i32(s, a0, a1);
+}
+
+static const TCGOutOpUnary outop_extrl_i64_i32 = {
+.base.static_constraint = C_O1_I1(r, r),
+.out_rr = TCG_TARGET_HAS_extr_i64_i32 ? tgen_extrl_i64_i32 : NULL,
+};
#endif
/*
@@ -1151,6 +1161,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = {
OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64),
OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64),
OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64),
+OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32),
#endif
};
@@ -2400,12 +2411,12 @@ bool tcg_op_supported(TCGOpcode op, TCGType type,
unsigned flags)
case INDEX_op_st_i64:
case INDEX_op_ext_i32_i64:
case INDEX_op_extu_i32_i64:
+case INDEX_op_extrl_i64_i32:
case INDEX_op_deposit_i64:
return TCG_TARGET_REG_BITS == 64;
case INDEX_op_extract2_i64:
return TCG_TARGET_HAS_extract2_i64;
-case INDEX_op_extrl_i64_i32:
case INDEX_op_extrh_i64_i32:
return TCG_TARGET_HAS_extr_i64_i32;
case INDEX_op_add2_i64:
@@ -5438,10 +5449,6 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp
*op)
/* emit instruction */
TCGType type = TCGOP_TYPE(op);
switch (op->opc) {
-case INDEX_op_extrl_i64_i32:
-tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
-break;
-
case INDEX_op_add:
case INDEX_op_and:
case INDEX_op_andc:
@@ -5499,6 +5506,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp
*op)
case INDEX_op_bswap64:
case INDEX_op_ext_i32_i64:
case INDEX_op_extu_i32_i64:
+case INDEX_op_extrl_i64_i32:
assert(TCG_TARGET_REG_BITS == 64);
/* fall through */
case INDEX_op_ctpop:
@@ -6657,6 +6665,22 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb,
uint64_t pc_start)
TCGOpcode opc = op->opc;
switch (opc) {
+case INDEX_op_extrl_i64_i32:
+assert(TCG_TARGET_REG_BITS == 64);
+/*
+ * If TCG_TYPE_I32 is represented in some canonical form,
+ * e.g. zero or sign-extended, then emit as a unary op.
+ * Otherwise we can treat this as a plain move.
+ * If the output dies, treat this as a plain move, because
+ * this will be implemented with a store.
+ */
+if (TCG_TARGET_HAS_extr_i64_i32) {
+TCGLifeData arg_life = op->life;
+if (!IS_DEAD_ARG(0)) {
+goto do_default;
+}
+}
+/* fall through */
case INDEX_op_mov:
case INDEX_op_mov_vec:
tcg_reg_alloc_mov(s, op);
@@ -6699,6 +6723,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb,
uint64_t pc_start)
}
/* fall through */
default:
+do_default:
/* Sanity check that we've not introduced any unhandled opcodes. */
tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
TCGOP_FLAGS(op)));
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 44314f6a0f..8abc5f26da 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/a
