On 11/11/2016 08:20 AM, jinguo...@loongson.cn wrote:
From: Jin Guojie <jinguo...@loongson.cn>
This patch implements TCG mips64r2(little-endian) translation backend.
Tested on Loongson 3A2000(a MIPS64-compatible CPU) with Fedora Linux 21 Remix.
linux-0.2.img.bz2 runs well.
The performance is nearly 10 times higher than tci mode.
https://en.wikipedia.org/wiki/Loongson
http://www.loongnix.org/index.php/Loongnix
Cc: Aurelien Jarno <aurel...@aurel32.net>
Signed-off-by: Jin Guojie <jinguo...@loongson.cn>
Have you seen
https://lists.nongnu.org/archive/html/qemu-devel/2016-02/msg01910.html
? I know there are bugs in that patch set, but I would like any mips64 support
to look like that. In particular, reduce the use of #if to an absolute minimum.
+#if UINTPTR_MAX == UINT32_MAX
+# define TCG_TARGET_REG_BITS 32
+#elif UINTPTR_MAX == UINT64_MAX
+# define TCG_TARGET_REG_BITS 64
+#endif
There are two mips64 abi's. You're only allowing 64-bit code to be generated
for n64, and not n32.
+#undef use_movnz_instructions
+#undef use_mips32_instructions
+#undef use_mips32r6_instructions
+
+#define use_movnz_instructions 0
+#define use_mips32_instructions 0
+#define use_mips32r6_instructions 0
Why? Certainly we should be able to generate code for mips64r2 and mips64r6.
+#if TCG_TARGET_REG_BITS == 64
+static const TCGReg tcg_target_call_oarg_regs[1] = {
+ TCG_REG_V0,
+};
+#else
static const TCGReg tcg_target_call_oarg_regs[2] = {
TCG_REG_V0,
TCG_REG_V1
};
+#endif
This change would be incorrect if we ever enhance tcg to handle __int128_t. In
the meantime it doesn't matter, and can be left unchanged.
@@ -459,7 +502,15 @@ static inline void tcg_out_mov(TCGContext *s, TCGType type,
{
/* Simple reg-reg move, optimising out the 'do nothing' case */
if (ret != arg) {
+#if TCG_TARGET_REG_BITS == 64
+ if (type == TCG_TYPE_I32) {
+ tcg_out_opc_reg(s, OPC_ADDU, ret, arg, TCG_REG_ZERO);
+ } else {
+ tcg_out_opc_reg(s, OPC_DADDU, ret, arg, TCG_REG_ZERO);
+ }
+#else
tcg_out_opc_reg(s, OPC_ADDU, ret, arg, TCG_REG_ZERO);
+#endif
}
This is why a proper mips assembler uses OPC_OR.
}
@@ -470,12 +521,21 @@ static inline void tcg_out_movi(TCGContext *s, TCGType
type,
tcg_out_opc_imm(s, OPC_ADDIU, reg, TCG_REG_ZERO, arg);
} else if (arg == (uint16_t)arg) {
tcg_out_opc_imm(s, OPC_ORI, reg, TCG_REG_ZERO, arg);
- } else {
+ } else if (arg == (int32_t)arg) {
tcg_out_opc_imm(s, OPC_LUI, reg, TCG_REG_ZERO, arg >> 16);
if (arg & 0xffff) {
tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff);
}
}
+#if TCG_TARGET_REG_BITS == 64
+ /* 64-bit imm */
+ else {
+ tcg_out_opc_imm(s, OPC_LUI, reg, 0, (arg >> 32) & 0xffff);
+ tcg_out_opc_imm(s, OPC_ORI, reg, reg, (arg >> 16) & 0xffff);
+ tcg_out_opc_imm_64(s, OPC_DSLL, reg, reg, 16);
+ tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff);
+ }
+#endif
This is only a 48-bit immediate.
}
static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg)
@@ -566,7 +626,11 @@ static void tcg_out_ldst(TCGContext *s, MIPSInsn opc,
TCGReg data,
if (ofs != lo) {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - lo);
if (addr != TCG_REG_ZERO) {
+#if TCG_TARGET_REG_BITS == 64
+ tcg_out_opc_reg(s, OPC_DADDU, TCG_TMP0, TCG_TMP0, addr);
+#else
tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, TCG_TMP0, addr);
+#endif
See my patchset where I introduce OPC_PADDU to avoid this and other similar
ifdefs.
@@ -1163,6 +1276,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s,
TCGLabelQemuLdst *l)
tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
v0 = l->datalo_reg;
+#if TCG_TARGET_REG_BITS == 32
if ((opc & MO_SIZE) == MO_64) {
/* We eliminated V0 from the possible output registers, so it
cannot be clobbered here. So we must move V1 first. */
@@ -1173,11 +1287,21 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s,
TCGLabelQemuLdst *l)
tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1);
}
}
+#endif
reloc_pc16(s->code_ptr, l->raddr);
tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
/* delay slot */
+#if TCG_TARGET_REG_BITS == 32
tcg_out_mov(s, TCG_TYPE_REG, v0, TCG_REG_V0);
+#else
+ /* ext unsigned long(32) -> 64-bit */
+ if ((opc & MO_SIZE) == MO_32) {
+ tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V0);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_REG, v0, TCG_REG_V0);
+ }
+#endif
This is incorrect, as you're not passing down whether the operation is a 32-bit
load into a 32-bit temporary, or a 32-bit load into a 64-bit temporary. I.e.
the difference between
unsigned int x = *(unsigned int *)ptr;
and
unsigned long long x = *(unsigned int *)ptr;
r~