On 3/21/2023 8:58 PM, liweiwei wrote: > > On 2023/3/21 14:37, fei2...@intel.com wrote: >> From: Fei Wu <fei2...@intel.com> >> >> Kernel needs to access user mode memory e.g. during syscalls, the window >> is usually opened up for a very limited time through MSTATUS.SUM, the >> overhead is too much if tlb_flush() gets called for every SUM change. >> This patch saves addresses accessed when SUM=1, and flushs only these >> pages when SUM changes to 0. If the buffer is not large enough to save >> all the pages during SUM=1, it will fall back to tlb_flush when >> necessary. >> >> The buffer size is set to 4 since in this MSTATUS.SUM open-up window, >> most of the time kernel accesses 1 or 2 pages, it's very rare to see >> more than 4 pages accessed. >> >> It's not necessary to save/restore these new added status, as >> tlb_flush() is always called after restore. >> >> Result of 'pipe 10' from unixbench boosts from 223656 to 1327407. Many >> other syscalls benefit a lot from this one too. >> >> Signed-off-by: Fei Wu <fei2...@intel.com> >> Reviewed-by: LIU Zhiwei <zhiwei_...@linux.alibaba.com> >> --- >> target/riscv/cpu.h | 4 ++++ >> target/riscv/cpu_helper.c | 7 +++++++ >> target/riscv/csr.c | 14 +++++++++++++- >> 3 files changed, 24 insertions(+), 1 deletion(-) >> >> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h >> index 638e47c75a..926dbce59f 100644 >> --- a/target/riscv/cpu.h >> +++ b/target/riscv/cpu.h >> @@ -383,6 +383,10 @@ struct CPUArchState { >> uint64_t kvm_timer_compare; >> uint64_t kvm_timer_state; >> uint64_t kvm_timer_frequency; >> + >> +#define MAX_CACHED_SUM_U_ADDR_NUM 4 >> + uint64_t sum_u_count; >> + uint64_t sum_u_addr[MAX_CACHED_SUM_U_ADDR_NUM]; >> }; >> OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU) >> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c >> index f88c503cf4..5ad0418eb6 100644 >> --- a/target/riscv/cpu_helper.c >> +++ b/target/riscv/cpu_helper.c >> @@ -1068,6 +1068,13 @@ restart: >> (access_type == MMU_DATA_STORE || (pte & PTE_D))) { >> *prot |= PAGE_WRITE; >> } >> + if ((pte & PTE_U) && (mode & PRV_S) && >> + get_field(env->mstatus, MSTATUS_SUM)) { >> + if (env->sum_u_count < MAX_CACHED_SUM_U_ADDR_NUM) { >> + env->sum_u_addr[env->sum_u_count] = addr; >> + } >> + ++env->sum_u_count; >> + } >> return TRANSLATE_SUCCESS; >> } >> } >> diff --git a/target/riscv/csr.c b/target/riscv/csr.c >> index ab566639e5..74b7638c8a 100644 >> --- a/target/riscv/csr.c >> +++ b/target/riscv/csr.c >> @@ -1246,9 +1246,21 @@ static RISCVException >> write_mstatus(CPURISCVState *env, int csrno, >> /* flush tlb on mstatus fields that affect VM */ >> if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV | >> - MSTATUS_MPRV | MSTATUS_SUM)) { >> + MSTATUS_MPRV)) { >> tlb_flush(env_cpu(env)); >> + env->sum_u_count = 0; >> + } else if ((mstatus & MSTATUS_SUM) && !(val & MSTATUS_SUM)) { >> + if (env->sum_u_count > MAX_CACHED_SUM_U_ADDR_NUM) { >> + tlb_flush(env_cpu(env)); > > SUM seems only affect S mode TLB. Maybe we can only flush S mode TLB here. > It's also in effect when MPRV=1 and MPP=S in M mode, we can only flush the tlb of PRV_S and PRV_M.
Thanks, Fei. >> + } else { >> + for (int i = 0; i < env->sum_u_count; ++i) { >> + tlb_flush_page_by_mmuidx(env_cpu(env), >> env->sum_u_addr[i], >> + 1 << PRV_S | 1 << PRV_M); > > Similar case here. > > Regards, > > Weiwei Li > >> + } >> + } >> + env->sum_u_count = 0; >> } >> + >> mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE | >> MSTATUS_SPP | MSTATUS_MPRV | MSTATUS_SUM | >> MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR | >