Tested i386-softmmu only. Now tci can run windows xp sp2 and its speed
is about 6 times slower than jit.
--
SUN OF A BEACH
Subject: [PATCH 1/5] tci: fix op_sar_iXX and op_ext16s_iXX
---
tcg/tci.c |6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/tcg/tci.c b/tcg/tci.c
index e467b3a..81c415c 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -206,7 +206,7 @@ static uint16_t tci_read_r16(uint8_t **tb_ptr)
}
/* Read indexed register (16 bit signed) from bytecode. */
-static uint16_t tci_read_r16s(uint8_t **tb_ptr)
+static int16_t tci_read_r16s(uint8_t **tb_ptr)
{
uint16_t value = tci_read_reg16s(**tb_ptr);
*tb_ptr += 1;
@@ -549,7 +549,7 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr)
t0 = *tb_ptr++;
t1 = tci_read_ri32(tb_ptr);
t2 = tci_read_ri32(tb_ptr);
-tci_write_reg32(t0, (t1 t2) | (t1 (1UL 31)));
+tci_write_reg32(t0, ((int32_t)t1 t2));
break;
#ifdef TCG_TARGET_HAS_rot_i32
case INDEX_op_rotl_i32:
@@ -794,7 +794,7 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr)
t0 = *tb_ptr++;
t1 = tci_read_ri64(tb_ptr);
t2 = tci_read_ri64(tb_ptr);
-tci_write_reg64(t0, (t1 t2) | (t1 (1ULL 63)));
+tci_write_reg64(t0, ((int64_t)t1 t2));
break;
#ifdef TCG_TARGET_HAS_rot_i64
case INDEX_op_rotl_i64:
--
1.6.3.msysgit.0
Subject: [PATCH 2/5] tci: add bswapXX_i32,div_i32 and rot_i32
---
tcg/bytecode/tcg-target.c | 24 +++-
tcg/tci.c | 40 +++-
2 files changed, 58 insertions(+), 6 deletions(-)
diff --git a/tcg/bytecode/tcg-target.c b/tcg/bytecode/tcg-target.c
index 2bd12b8..aae570f 100644
--- a/tcg/bytecode/tcg-target.c
+++ b/tcg/bytecode/tcg-target.c
@@ -722,6 +722,10 @@ static void tcg_out_op(TCGContext *s, int opc, const
TCGArg *args,
case INDEX_op_shl_i32:
case INDEX_op_shr_i32:
case INDEX_op_sar_i32:
+#ifdef TCG_TARGET_HAS_rot_i32
+case INDEX_op_rotl_i32:
+case INDEX_op_rotr_i32:
+#endif
tcg_out_op_t(s, opc);
tcg_out_r(s, args[0]);
tcg_out_ri32(s, const_args[1], args[1]);
@@ -816,7 +820,10 @@ static void tcg_out_op(TCGContext *s, int opc, const
TCGArg *args,
case INDEX_op_divu_i32:
case INDEX_op_rem_i32:
case INDEX_op_remu_i32:
-TODO();
+tcg_out_op_t(s, opc);
+tcg_out_r(s, args[0]);
+tcg_out_ri32(s, const_args[1], args[1]);
+tcg_out_ri32(s, const_args[2], args[2]);
break;
#else
case INDEX_op_div2_i32:
@@ -1002,6 +1009,21 @@ static void tcg_out_op(TCGContext *s, int opc, const
TCGArg *args,
break;
#endif
#endif /* TCG_TARGET_REG_BITS == 64 */
+#if defined(TCG_TARGET_HAS_bswap32_i32)
+case INDEX_op_bswap32_i32:
+tcg_out_op_t(s, opc);
+tcg_out_r(s, args[0]);
+tcg_out_r(s, args[1]);
+break;
+#endif
+#if defined(TCG_TARGET_HAS_bswap16_i32)
+case INDEX_op_bswap16_i32:
+tcg_dump_ops(s, stderr);
+tcg_out_op_t(s, opc);
+tcg_out_r(s, args[0]);
+tcg_out_r(s, args[1]);
+break;
+#endif
case INDEX_op_end:
TODO();
break;
diff --git a/tcg/tci.c b/tcg/tci.c
index 81c415c..8bb78e3 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -503,11 +503,29 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr)
break;
#ifdef TCG_TARGET_HAS_div_i32
case INDEX_op_div_i32:
+t0 = *tb_ptr++;
+t1 = tci_read_ri32(tb_ptr);
+t2 = tci_read_ri32(tb_ptr);
+tci_write_reg32(t0, (int32_t)t1 / (int32_t)t2);
+break;
case INDEX_op_divu_i32:
+t0 = *tb_ptr++;
+t1 = tci_read_ri32(tb_ptr);
+t2 = tci_read_ri32(tb_ptr);
+tci_write_reg32(t0, t1 / t2);
+break;
case INDEX_op_rem_i32:
+t0 = *tb_ptr++;
+t1 = tci_read_ri32(tb_ptr);
+t2 = tci_read_ri32(tb_ptr);
+tci_write_reg32(t0, (int32_t)t1 % (int32_t)t2);
+break;
case INDEX_op_remu_i32:
-TODO();
-break;
+t0 = *tb_ptr++;
+t1 = tci_read_ri32(tb_ptr);
+t2 = tci_read_ri32(tb_ptr);
+tci_write_reg32(t0, t1 % t2);
+break;
#else
case INDEX_op_div2_i32:
case INDEX_op_divu2_i32:
@@ -553,8 +571,16 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr)
break;
#ifdef TCG_TARGET_HAS_rot_i32
case INDEX_op_rotl_i32:
+t0 = *tb_ptr++;
+t1 = tci_read_ri32(tb_ptr);
+t2 = tci_read_ri32(tb_ptr);
+tci_write_reg32(t0, (t1t2)|(t1(32-t2)));
+break;
case INDEX_op_rotr_i32:
-TODO();
+t0 = *tb_ptr++;
+t1 = tci_read_ri32(tb_ptr);
+t2 = tci_read_ri32(tb_ptr);
+