On Wed, 22 Apr 2020 at 05:33, Richard Henderson <richard.hender...@linaro.org> wrote: > > This new interface will allow targets to probe for a page > and then handle watchpoints themselves. This will be most > useful for vector predicated memory operations, where one > page lookup can be used for many operations, and one test > can avoid many watchpoint checks. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > v2: Fix return of host pointer in softmmu probe_access_flags. > --- > include/exec/cpu-all.h | 13 ++- > include/exec/exec-all.h | 22 +++++ > accel/tcg/cputlb.c | 177 ++++++++++++++++++++-------------------- > accel/tcg/user-exec.c | 36 +++++--- > 4 files changed, 149 insertions(+), 99 deletions(-) > > diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h > index 49384bb66a..43ddcf024c 100644 > --- a/include/exec/cpu-all.h > +++ b/include/exec/cpu-all.h > @@ -328,7 +328,18 @@ CPUArchState *cpu_copy(CPUArchState *env); > | CPU_INTERRUPT_TGT_EXT_3 \ > | CPU_INTERRUPT_TGT_EXT_4) > > -#if !defined(CONFIG_USER_ONLY) > +#ifdef CONFIG_USER_ONLY > + > +/* > + * Allow some level of source compatibility with softmmu. We do not > + * support any of the more exotic features, so only invalid pages may > + * be signaled by probe_access_flags(). > + */ > +#define TLB_INVALID_MASK (1 << (TARGET_PAGE_BITS_MIN - 1)) > +#define TLB_MMIO 0 > +#define TLB_WATCHPOINT 0 > + > +#else > > /* > * Flags stored in the low bits of the TLB virtual address. > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index d656a1f05c..8792bea07a 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -362,6 +362,28 @@ static inline void *probe_read(CPUArchState *env, > target_ulong addr, int size, > return probe_access(env, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); > } > > +/** > + * probe_access_flags: > + * @env: CPUArchState > + * @addr: guest virtual address to look up > + * @access_type: read, write or execute permission > + * @mmu_idx: MMU index to use for lookup > + * @nonfault: suppress the fault > + * @phost: return value for host address > + * @retaddr: return address for unwinding > + * > + * Similar to probe_access, loosely returning the TLB_FLAGS_MASK for > + * the page, and storing the host address for RAM in @phost. > + * > + * If @nonfault is set, do not raise an exception but return > TLB_INVALID_MASK. > + * Do not handle watchpoints, but include TLB_WATCHPOINT in the returned > flags. > + * Do handle clean pages, so exclude TLB_NOTDIRY from the returned flags.
"TLB_NOTDIRTY" > + * For simplicity, all "mmio-like" flags are folded to TLB_MMIO. > + */ > +int probe_access_flags(CPUArchState *env, target_ulong addr, > + MMUAccessType access_type, int mmu_idx, > + bool nonfault, void **phost, uintptr_t retaddr); > + > #define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache > line */ > > /* Estimated block size for TB allocation. */ > diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c > index e3b5750c3b..bbe265ce28 100644 > --- a/accel/tcg/cputlb.c > +++ b/accel/tcg/cputlb.c > @@ -1231,86 +1231,16 @@ static void notdirty_write(CPUState *cpu, vaddr > mem_vaddr, unsigned size, > } > } > > -/* > - * Probe for whether the specified guest access is permitted. If it is not > - * permitted then an exception will be taken in the same way as if this > - * were a real access (and we will not return). > - * If the size is 0 or the page requires I/O access, returns NULL; otherwise, > - * returns the address of the host page similar to tlb_vaddr_to_host(). > - */ > -void *probe_access(CPUArchState *env, target_ulong addr, int size, > - MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) > +static int probe_access_internal(CPUArchState *env, target_ulong addr, > + int fault_size, MMUAccessType access_type, > + int mmu_idx, bool nonfault, > + void **phost, uintptr_t retaddr) > { > uintptr_t index = tlb_index(env, mmu_idx, addr); > CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); > - target_ulong tlb_addr; > - size_t elt_ofs; > - int wp_access; > - > - g_assert(-(addr | TARGET_PAGE_MASK) >= size); > - > - switch (access_type) { > - case MMU_DATA_LOAD: > - elt_ofs = offsetof(CPUTLBEntry, addr_read); > - wp_access = BP_MEM_READ; > - break; > - case MMU_DATA_STORE: > - elt_ofs = offsetof(CPUTLBEntry, addr_write); > - wp_access = BP_MEM_WRITE; > - break; > - case MMU_INST_FETCH: > - elt_ofs = offsetof(CPUTLBEntry, addr_code); > - wp_access = BP_MEM_READ; > - break; > - default: > - g_assert_not_reached(); > - } > - tlb_addr = tlb_read_ofs(entry, elt_ofs); > - > - if (unlikely(!tlb_hit(tlb_addr, addr))) { > - if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, > - addr & TARGET_PAGE_MASK)) { > - tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, > retaddr); > - /* TLB resize via tlb_fill may have moved the entry. */ > - index = tlb_index(env, mmu_idx, addr); > - entry = tlb_entry(env, mmu_idx, addr); > - } > - tlb_addr = tlb_read_ofs(entry, elt_ofs); > - } > - > - if (!size) { > - return NULL; > - } > - > - if (unlikely(tlb_addr & TLB_FLAGS_MASK)) { > - CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; > - > - /* Reject I/O access, or other required slow-path. */ > - if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) { > - return NULL; > - } > - > - /* Handle watchpoints. */ > - if (tlb_addr & TLB_WATCHPOINT) { > - cpu_check_watchpoint(env_cpu(env), addr, size, > - iotlbentry->attrs, wp_access, retaddr); > - } > - > - /* Handle clean RAM pages. */ > - if (tlb_addr & TLB_NOTDIRTY) { > - notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); > - } > - } > - > - return (void *)((uintptr_t)addr + entry->addend); > -} > - > -void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, > - MMUAccessType access_type, int mmu_idx) > -{ > - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); > - target_ulong tlb_addr, page; > + target_ulong tlb_addr, page_addr; > size_t elt_ofs; > + int flags; > > switch (access_type) { > case MMU_DATA_LOAD: > @@ -1325,20 +1255,19 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr > addr, > default: > g_assert_not_reached(); > } > - > - page = addr & TARGET_PAGE_MASK; > tlb_addr = tlb_read_ofs(entry, elt_ofs); > > - if (!tlb_hit_page(tlb_addr, page)) { > - uintptr_t index = tlb_index(env, mmu_idx, addr); > - > - if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) { > + page_addr = addr & TARGET_PAGE_MASK; > + if (!tlb_hit_page(tlb_addr, page_addr)) { > + if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) { > CPUState *cs = env_cpu(env); > CPUClass *cc = CPU_GET_CLASS(cs); > > - if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) { > + if (!cc->tlb_fill(cs, addr, fault_size, access_type, > + mmu_idx, nonfault, retaddr)) { > /* Non-faulting page table read failed. */ > - return NULL; > + *phost = NULL; > + return TLB_INVALID_MASK; > } > > /* TLB resize via tlb_fill may have moved the entry. */ > @@ -1346,15 +1275,89 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr > addr, > } > tlb_addr = tlb_read_ofs(entry, elt_ofs); > } > + flags = tlb_addr & TLB_FLAGS_MASK; > > - if (tlb_addr & ~TARGET_PAGE_MASK) { > - /* IO access */ > + /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */ > + if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) { > + *phost = NULL; > + return TLB_MMIO; > + } > + > + /* Everything else is RAM. */ > + *phost = (void *)((uintptr_t)addr + entry->addend); > + return flags; > +} > + > +int probe_access_flags(CPUArchState *env, target_ulong addr, > + MMUAccessType access_type, int mmu_idx, > + bool nonfault, void **phost, uintptr_t retaddr) > +{ > + int flags; > + > + flags = probe_access_internal(env, addr, 0, access_type, mmu_idx, > + nonfault, phost, retaddr); > + > + /* Handle clean RAM pages. */ > + if (unlikely(flags & TLB_NOTDIRTY)) { > + uintptr_t index = tlb_index(env, mmu_idx, addr); > + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; > + > + notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr); > + flags &= ~TLB_NOTDIRTY; > + } probe_access() handles watchpoints. Why doesn't probe_access_flags() have to do that? > + > + return flags; > +} > + > +void *probe_access(CPUArchState *env, target_ulong addr, int size, > + MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) > +{ > + void *host; > + int flags; > + > + g_assert(-(addr | TARGET_PAGE_MASK) >= size); > + > + flags = probe_access_internal(env, addr, size, access_type, mmu_idx, > + false, &host, retaddr); > + > + /* Per the interface, size == 0 merely faults the access. */ > + if (size == 0) { > return NULL; > } > > - return (void *)((uintptr_t)addr + entry->addend); > + if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) { > + uintptr_t index = tlb_index(env, mmu_idx, addr); > + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; > + > + /* Handle clean RAM pages. */ > + if (flags & TLB_NOTDIRTY) { > + notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr); > + } > + > + /* Handle watchpoints. */ > + if (flags & TLB_WATCHPOINT) { > + int wp_access = (access_type == MMU_DATA_STORE > + ? BP_MEM_WRITE : BP_MEM_READ); > + cpu_check_watchpoint(env_cpu(env), addr, size, > + iotlbentry->attrs, wp_access, retaddr); > + } The old code checked for watchpoints first, and then handled notdirty-writes, which seems like the more correct order. Why has the new version switched them around? (Incidentally notdirty_write() is rather misleadingly named, since it doesn't actually do a write, it just marks a notdirty page as dirty.) > + } > + > + return host; The probe_access_internal() doc comment doesn't say that it guarantees to set host to NULL for the TLB_MMIO/TLB_INVALID_MASK cases, but we implicitly rely on it here. > } > > +void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, > + MMUAccessType access_type, int mmu_idx) > +{ > + void *host; > + int flags; > + > + flags = probe_access_internal(env, addr, 0, access_type, > + mmu_idx, true, &host, 0); > + > + /* No combination of flags are expected by the caller. */ > + return flags ? NULL : host; > +} > > #ifdef CONFIG_PLUGIN > /* > diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c > index 4be78eb9b3..91c2dc46dd 100644 > --- a/accel/tcg/user-exec.c > +++ b/accel/tcg/user-exec.c > @@ -190,13 +190,12 @@ static inline int handle_cpu_signal(uintptr_t pc, > siginfo_t *info, > g_assert_not_reached(); > } > > -void *probe_access(CPUArchState *env, target_ulong addr, int size, > - MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) > +int probe_access_flags(CPUArchState *env, target_ulong addr, > + MMUAccessType access_type, int mmu_idx, > + bool nonfault, void **phost, uintptr_t ra) > { > int flags; > > - g_assert(-(addr | TARGET_PAGE_MASK) >= size); > - > switch (access_type) { > case MMU_DATA_STORE: > flags = PAGE_WRITE; > @@ -211,15 +210,30 @@ void *probe_access(CPUArchState *env, target_ulong > addr, int size, > g_assert_not_reached(); > } > > - if (!guest_addr_valid(addr) || page_check_range(addr, size, flags) < 0) { > - CPUState *cpu = env_cpu(env); > - CPUClass *cc = CPU_GET_CLASS(cpu); > - cc->tlb_fill(cpu, addr, size, access_type, MMU_USER_IDX, false, > - retaddr); > - g_assert_not_reached(); > + if (!guest_addr_valid(addr) || page_check_range(addr, 1, flags) < 0) { > + if (nonfault) { > + *phost = NULL; > + return TLB_INVALID_MASK; > + } else { > + CPUState *cpu = env_cpu(env); > + CPUClass *cc = CPU_GET_CLASS(cpu); > + cc->tlb_fill(cpu, addr, 0, access_type, MMU_USER_IDX, false, ra); > + g_assert_not_reached(); > + } > } > > - return size ? g2h(addr) : NULL; > + *phost = g2h(addr); > + return 0; > +} > + > +void *probe_access(CPUArchState *env, target_ulong addr, int size, > + MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) > +{ > + void *host; > + > + g_assert(-(addr | TARGET_PAGE_MASK) >= size); > + probe_access_flags(env, addr, access_type, mmu_idx, false, &host, > retaddr); > + return host; The old code returned NULL for a zero size; the new version does not. The old code passed size into cc->tlb_fill; the new version does not. The old code passed size into page_check_range(); the new version does not. thanks -- PMM