Richard Henderson <richard.hender...@linaro.org> writes:
> The old ppc64 implementation replaces 2 or 4 insns, which leaves a race > condition in which a thread could be stopped at a PC in the middle of > the sequence, and when restarted does not see the complete address > computation and branches to nowhere. > > The new implemetation replaces only one insn, swapping between > > b <dest> > and > mtctr r31 > > falling through to a general-case indirect branch. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- <snip> > > static void tcg_out_goto_tb(TCGContext *s, int which) > { > - /* Direct jump. */ > - if (TCG_TARGET_REG_BITS == 64) { > - /* Ensure the next insns are 8 or 16-byte aligned. */ > - while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) { > - tcg_out32(s, NOP); > - } > + uintptr_t ptr = get_jmp_target_addr(s, which); > + > + if (USE_REG_TB) { > + ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr); > + tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset); > + > + /* Direct branch will be patched by tb_target_set_jmp_target. */ > set_jmp_insn_offset(s, which); > - tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0)); > - tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0)); > tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); > + > + /* When branch is out of range, fall through to indirect. */ > + tcg_out32(s, BCCTR | BO_ALWAYS); > + > + /* For the unlinked case, need to reset TCG_REG_TB. */ > + set_jmp_reset_offset(s, which); > + tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB, > + -tcg_current_code_size(s)); > + } else { > + /* Direct branch will be patched by tb_target_set_jmp_target. */ > + set_jmp_insn_offset(s, which); > + tcg_out32(s, NOP); > + > + /* When branch is out of range, fall through to indirect. */ > + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr); > + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, > (int16_t)ptr); > + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); > tcg_out32(s, BCCTR | BO_ALWAYS); > set_jmp_reset_offset(s, which); > - if (USE_REG_TB) { > - /* For the unlinked case, need to reset TCG_REG_TB. */ > - tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB, > - -tcg_current_code_size(s)); > - } > - } else { > - set_jmp_insn_offset(s, which); > - tcg_out32(s, B); > - set_jmp_reset_offset(s, which); > } > } > > +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, > + uintptr_t jmp_rx, uintptr_t jmp_rw) > +{ > + uintptr_t addr = tb->jmp_target_addr[n]; > + intptr_t diff = addr - jmp_rx; > + tcg_insn_unit insn; > + > + if (in_range_b(diff)) { > + insn = B | (diff & 0x3fffffc); Again deposit would be nice here. > + } else if (USE_REG_TB) { > + insn = MTSPR | RS(TCG_REG_TB) | CTR; > + } else { > + insn = NOP; > + } > + > + qatomic_set((uint32_t *)jmp_rw, insn); > + flush_idcache_range(jmp_rx, jmp_rw, 4); > +} > + > static void tcg_out_op(TCGContext *s, TCGOpcode opc, > const TCGArg args[TCG_MAX_OP_ARGS], > const int const_args[TCG_MAX_OP_ARGS]) Otherwise: Reviewed-by: Alex Bennée <alex.ben...@linaro.org> -- Alex Bennée Virtualisation Tech Lead @ Linaro