Similar to the ARM64, LoongArch has PC-relative instructions such as PCADDU18I. These instructions can be used to support direct jump for LoongArch. Additionally, if instruction "B offset" can cover the target address, "tb_target_set_jmp_target" will only patch the "B offset".
Signed-off-by: Qi Hu <h...@loongson.cn> --- tcg/loongarch64/tcg-insn-defs.c.inc | 3 ++ tcg/loongarch64/tcg-target.c.inc | 49 ++++++++++++++++++++++++++--- tcg/loongarch64/tcg-target.h | 2 +- 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/tcg/loongarch64/tcg-insn-defs.c.inc b/tcg/loongarch64/tcg-insn-defs.c.inc index d162571856..f5869c6bb1 100644 --- a/tcg/loongarch64/tcg-insn-defs.c.inc +++ b/tcg/loongarch64/tcg-insn-defs.c.inc @@ -112,6 +112,9 @@ typedef enum { OPC_BLE = 0x64000000, OPC_BGTU = 0x68000000, OPC_BLEU = 0x6c000000, + /* pseudo-instruction */ + NOP = 0x03400000, + } LoongArchInsn; static int32_t __attribute__((unused)) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index f5a214a17f..3a7b1df081 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1058,11 +1058,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_goto_tb: - assert(s->tb_jmp_insn_offset == 0); - /* indirect jump method */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, - (uintptr_t)(s->tb_jmp_target_addr + a0)); - tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0); + if (s->tb_jmp_insn_offset != NULL) { + /* TCG_TARGET_HAS_direct_jump */ + /* Ensure that PCADD+JIRL are 8-byte aligned so that an atomic + write can be used to patch the target address. */ + if ((uintptr_t)s->code_ptr & 7) { + tcg_out32(s, NOP); + } + s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); + /* actual branch destination will be patched by + tb_target_set_jmp_target later. */ + tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0); + tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0); + } else { + /* !TCG_TARGET_HAS_direct_jump */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, + (uintptr_t)(s->tb_jmp_target_addr + a0)); + tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0); + } set_jmp_reset_offset(s, a0); break; @@ -1708,6 +1721,32 @@ static void tcg_target_init(TCGContext *s) tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED); } +void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, + uintptr_t jmp_rw, uintptr_t addr) +{ + tcg_insn_unit i1, i2; + + ptrdiff_t offset = addr - jmp_rx; + + if (offset == sextreg(offset, 0, 28)) { + i1 = OPC_B | ((offset >> 18) & 0x3ff) | ((offset << 8) & 0x3fffc00); + i2 = NOP; + } else { + offset >>= 2; + + ptrdiff_t upper, lower; + upper = ((offset + (1 << 15)) >> 16) & 0xfffff; + lower = (offset & 0xffff); + /* patch pcaddu18i */ + i1 = OPC_PCADDU18I | upper << 5 | TCG_REG_T0; + /* patch jirl */ + i2 = OPC_JIRL | lower << 10 | TCG_REG_T0 << 5; + } + uint64_t pair = (uint64_t)i2 << 32 | i1; + qatomic_set((uint64_t *)jmp_rw, pair); + flush_idcache_range(jmp_rx, jmp_rw, 8); +} + typedef struct { DebugFrameHeader h; uint8_t fde_def_cfa[4]; diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 67380b2432..0e552731f5 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -123,7 +123,7 @@ typedef enum { #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 0 -#define TCG_TARGET_HAS_direct_jump 0 +#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_brcond2 0 #define TCG_TARGET_HAS_setcond2 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -- 2.37.3