On 20.02.15 13:45, Paolo Bonzini wrote: > PowerPC TCG flushes the TLB on every IR/DR change, which basically > means on every user<->kernel context switch. Encode IR/DR in the > MMU index. > > This brings the number of TLB flushes down from ~900000 to ~50000 > for starting up the Debian installer, which is in line with x86 > and gives a ~10% performance improvement. > > Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> > --- > target-ppc/cpu.h | 7 ++----- > target-ppc/excp_helper.c | 3 --- > target-ppc/helper_regs.h | 11 ++++++----- > 3 files changed, 8 insertions(+), 13 deletions(-) > > diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h > index aae33a9..610d884 100644 > --- a/target-ppc/cpu.h > +++ b/target-ppc/cpu.h > @@ -943,7 +943,8 @@ struct ppc_segment_page_sizes { > > > /*****************************************************************************/ > /* The whole PowerPC CPU context */ > -#define NB_MMU_MODES 3 > +#define NB_MMU_MODES 12 > +#define MMU_USER_IDX 3 /* PR=IR=DR=1 */ > > #define PPC_CPU_OPCODES_LEN 0x40 > #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20 > @@ -1252,10 +1253,6 @@ static inline CPUPPCState *cpu_init(const char > *cpu_model) > #define cpu_list ppc_cpu_list > > /* MMU modes definitions */ > -#define MMU_MODE0_SUFFIX _user > -#define MMU_MODE1_SUFFIX _kernel > -#define MMU_MODE2_SUFFIX _hypv > -#define MMU_USER_IDX 0 > static inline int cpu_mmu_index (CPUPPCState *env) > { > return env->mmu_idx; > diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c > index b803475..f608701 100644 > --- a/target-ppc/excp_helper.c > +++ b/target-ppc/excp_helper.c > @@ -623,9 +623,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int > excp_model, int excp) > > if (env->spr[SPR_LPCR] & LPCR_AIL) { > new_msr |= (1 << MSR_IR) | (1 << MSR_DR); > - } else if (msr & ((1 << MSR_IR) | (1 << MSR_DR))) { > - /* If we disactivated any translation, flush TLBs */ > - tlb_flush(cs, 1); > } > > #ifdef TARGET_PPC64 > diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h > index 271fddf..23b8ded 100644 > --- a/target-ppc/helper_regs.h > +++ b/target-ppc/helper_regs.h > @@ -41,12 +41,15 @@ static inline void hreg_swap_gpr_tgpr(CPUPPCState *env) > > static inline void hreg_compute_mem_idx(CPUPPCState *env) > { > + int high; > + > /* Precompute MMU index */ > if (msr_pr == 0 && msr_hv != 0) { > - env->mmu_idx = 2; > + high = 2;
Could you instead do something like uint32_t mmu_idx = 0; mmu_idx |= MMU_IDX_HV; > } else { > - env->mmu_idx = 1 - msr_pr; > + high = 1 - msr_pr; mmu_idx |= (msr_pr & 1) ? MMU_IDX_PR; > } > + env->mmu_idx = (high << 2) | (msr_ir << 1) | msr_dr; mmu_idx |= (msr_ir & 1) ? MMU_IDX_IR; mmu_idx |= (msr_dr & 1) ? MMU_IDX_DR; env->mmu_idx = mmu_idx; and check whether the compiler is smart enough to optimize this out considering that it's all constants? Also please double-check that 440 still works. That was the target that gave me the most headaches on DR/IR switching so far. Otherwise looks simple and clean to me :). Alex > } > > static inline void hreg_compute_hflags(CPUPPCState *env) > @@ -56,7 +59,7 @@ static inline void hreg_compute_hflags(CPUPPCState *env) > /* We 'forget' FE0 & FE1: we'll never generate imprecise exceptions */ > hflags_mask = (1 << MSR_VR) | (1 << MSR_AP) | (1 << MSR_SA) | > (1 << MSR_PR) | (1 << MSR_FP) | (1 << MSR_SE) | (1 << MSR_BE) | > - (1 << MSR_LE) | (1 << MSR_VSX); > + (1 << MSR_LE) | (1 << MSR_VSX) | (1 << MSR_IR) | (1 << MSR_DR); > hflags_mask |= (1ULL << MSR_CM) | (1ULL << MSR_SF) | MSR_HVB; > hreg_compute_mem_idx(env); > env->hflags = env->msr & hflags_mask; > @@ -82,8 +85,6 @@ static inline int hreg_store_msr(CPUPPCState *env, > target_ulong value, > } > if (((value >> MSR_IR) & 1) != msr_ir || > ((value >> MSR_DR) & 1) != msr_dr) { > - /* Flush all tlb when changing translation mode */ > - tlb_flush(cs, 1); > excp = POWERPC_EXCP_NONE; > cs->interrupt_request |= CPU_INTERRUPT_EXITTB; > } >