From: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com> With kvm enabled, we store the hash page table information in the hypervisor. Use ioctl to read the htab contents. Without this we get the below error when trying to read the guest address
(gdb) x/10 do_fork 0xc000000000098660 <do_fork>: Cannot access memory at address 0xc000000000098660 (gdb) [ folded fixes for 32 bit build (casts!), ldq_phys() API change, Greg Kurz <gk...@linux.vnet.ibm.com ] Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com> Signed-off-by: Greg Kurz <gk...@linux.vnet.ibm.com> Signed-off-by: Alexander Graf <ag...@suse.de> --- hw/ppc/spapr.c | 1 + hw/ppc/spapr_hcall.c | 50 +++++++++++++++++++----------- target-ppc/kvm.c | 53 ++++++++++++++++++++++++++++++++ target-ppc/kvm_ppc.h | 19 +++++++++++ target-ppc/mmu-hash64.c | 78 +++++++++++++++++++++++++++++++++++++++-------- target-ppc/mmu-hash64.h | 22 +++++++++---- 6 files changed, 183 insertions(+), 40 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 8ac4d8a..94cf520 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -686,6 +686,7 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) if (shift > 0) { /* Kernel handles htab, we don't need to allocate one */ spapr->htab_shift = shift; + kvmppc_kern_htab = true; } else { if (!spapr->htab) { /* Allocate an htab if we don't yet have one */ diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index d19e3fc..7493302 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -61,8 +61,9 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong ptel = args[3]; target_ulong page_shift = 12; target_ulong raddr; - target_ulong i; + target_ulong index; hwaddr hpte; + uint64_t token; /* only handle 4k and 16M pages for now */ if (pteh & HPTE64_V_LARGE) { @@ -105,30 +106,37 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPREnvironment *spapr, if (!valid_pte_index(env, pte_index)) { return H_PARAMETER; } + + index = 0; + hpte = pte_index * HASH_PTE_SIZE_64; if (likely((flags & H_EXACT) == 0)) { pte_index &= ~7ULL; - hpte = pte_index * HASH_PTE_SIZE_64; - for (i = 0; ; ++i) { - if (i == 8) { + token = ppc_hash64_start_access(cpu, pte_index); + do { + if (index == 8) { + ppc_hash64_stop_access(token); return H_PTEG_FULL; } - if ((ppc_hash64_load_hpte0(env, hpte) & HPTE64_V_VALID) == 0) { + if ((ppc_hash64_load_hpte0(env, token, index) & HPTE64_V_VALID) == 0) { break; } - hpte += HASH_PTE_SIZE_64; - } + } while (index++); + ppc_hash64_stop_access(token); } else { - i = 0; - hpte = pte_index * HASH_PTE_SIZE_64; - if (ppc_hash64_load_hpte0(env, hpte) & HPTE64_V_VALID) { + token = ppc_hash64_start_access(cpu, pte_index); + if (ppc_hash64_load_hpte0(env, token, 0) & HPTE64_V_VALID) { + ppc_hash64_stop_access(token); return H_PTEG_FULL; } + ppc_hash64_stop_access(token); } + hpte += index * HASH_PTE_SIZE_64; + ppc_hash64_store_hpte1(env, hpte, ptel); /* eieio(); FIXME: need some sort of barrier for smp? */ ppc_hash64_store_hpte0(env, hpte, pteh | HPTE64_V_HPTE_DIRTY); - args[0] = pte_index + i; + args[0] = pte_index + index; return H_SUCCESS; } @@ -145,16 +153,17 @@ static RemoveResult remove_hpte(CPUPPCState *env, target_ulong ptex, target_ulong *vp, target_ulong *rp) { hwaddr hpte; + uint64_t token; target_ulong v, r, rb; if (!valid_pte_index(env, ptex)) { return REMOVE_PARM; } - hpte = ptex * HASH_PTE_SIZE_64; - - v = ppc_hash64_load_hpte0(env, hpte); - r = ppc_hash64_load_hpte1(env, hpte); + token = ppc_hash64_start_access(ppc_env_get_cpu(env), ptex); + v = ppc_hash64_load_hpte0(env, token, 0); + r = ppc_hash64_load_hpte1(env, token, 0); + ppc_hash64_stop_access(token); if ((v & HPTE64_V_VALID) == 0 || ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) || @@ -163,6 +172,7 @@ static RemoveResult remove_hpte(CPUPPCState *env, target_ulong ptex, } *vp = v; *rp = r; + hpte = ptex * HASH_PTE_SIZE_64; ppc_hash64_store_hpte0(env, hpte, HPTE64_V_HPTE_DIRTY); rb = compute_tlbie_rb(v, r, ptex); ppc_tlb_invalidate_one(env, rb); @@ -271,16 +281,17 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong pte_index = args[1]; target_ulong avpn = args[2]; hwaddr hpte; + uint64_t token; target_ulong v, r, rb; if (!valid_pte_index(env, pte_index)) { return H_PARAMETER; } - hpte = pte_index * HASH_PTE_SIZE_64; - - v = ppc_hash64_load_hpte0(env, hpte); - r = ppc_hash64_load_hpte1(env, hpte); + token = ppc_hash64_start_access(cpu, pte_index); + v = ppc_hash64_load_hpte0(env, token, 0); + r = ppc_hash64_load_hpte1(env, token, 0); + ppc_hash64_stop_access(token); if ((v & HPTE64_V_VALID) == 0 || ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) { @@ -293,6 +304,7 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPREnvironment *spapr, r |= (flags << 48) & HPTE64_R_KEY_HI; r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO); rb = compute_tlbie_rb(v, r, pte_index); + hpte = pte_index * HASH_PTE_SIZE_64; ppc_hash64_store_hpte0(env, hpte, (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY); ppc_tlb_invalidate_one(env, rb); ppc_hash64_store_hpte1(env, hpte, r); diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 969ebdd..e50a50e 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -1788,6 +1788,11 @@ bool kvmppc_has_cap_epr(void) return cap_epr; } +bool kvmppc_has_cap_htab_fd(void) +{ + return cap_htab_fd; +} + static int kvm_ppc_register_host_cpu_type(void) { TypeInfo type_info = { @@ -1938,3 +1943,51 @@ void kvm_arch_remove_all_hw_breakpoints(void) void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg) { } + +struct kvm_get_htab_buf { + struct kvm_get_htab_header header; + /* + * We require one extra byte for read + */ + target_ulong hpte[(HPTES_PER_GROUP * 2) + 1]; +}; + +uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index) +{ + int htab_fd; + struct kvm_get_htab_fd ghf; + struct kvm_get_htab_buf *hpte_buf; + + ghf.flags = 0; + ghf.start_index = pte_index; + htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); + if (htab_fd < 0) { + goto error_out; + } + + hpte_buf = g_malloc0(sizeof(*hpte_buf)); + /* + * Read the hpte group + */ + if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) { + goto out_close; + } + + close(htab_fd); + return (uint64_t) hpte_buf->hpte; + +out_close: + g_free(hpte_buf); + close(htab_fd); +error_out: + return 0; +} + +void kvmppc_hash64_free_pteg(uint64_t token) +{ + struct kvm_get_htab_buf *htab_buf; + + htab_buf = container_of((void *)token, struct kvm_get_htab_buf, hpte); + g_free(htab_buf); + return; +} diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h index 5f78e4b..800e1ad 100644 --- a/target-ppc/kvm_ppc.h +++ b/target-ppc/kvm_ppc.h @@ -39,10 +39,13 @@ uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift); int kvmppc_fixup_cpu(PowerPCCPU *cpu); bool kvmppc_has_cap_epr(void); int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function); +bool kvmppc_has_cap_htab_fd(void); int kvmppc_get_htab_fd(bool write); int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns); int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, uint16_t n_valid, uint16_t n_invalid); +uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index); +void kvmppc_hash64_free_pteg(uint64_t token); #else @@ -171,6 +174,11 @@ static inline int kvmppc_define_rtas_kernel_token(uint32_t token, return -1; } +static inline bool kvmppc_has_cap_htab_fd(void) +{ + return false; +} + static inline int kvmppc_get_htab_fd(bool write) { return -1; @@ -188,6 +196,17 @@ static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, abort(); } +static inline uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, + target_ulong pte_index) +{ + abort(); +} + +static inline void kvmppc_hash64_free_pteg(uint64_t token) +{ + abort(); +} + #endif #ifndef CONFIG_KVM diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c index 739dece..68a6f69 100644 --- a/target-ppc/mmu-hash64.c +++ b/target-ppc/mmu-hash64.c @@ -41,6 +41,11 @@ #endif /* + * Used to indicate whether we have allocated htab in the + * host kernel + */ +bool kvmppc_kern_htab; +/* * SLB handling */ @@ -310,29 +315,76 @@ static int ppc_hash64_amr_prot(CPUPPCState *env, ppc_hash_pte64_t pte) return prot; } -static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr pteg_off, +uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index) +{ + uint64_t token = 0; + hwaddr pte_offset; + + pte_offset = pte_index * HASH_PTE_SIZE_64; + if (kvmppc_kern_htab) { + /* + * HTAB is controlled by KVM. Fetch the PTEG into a new buffer. + */ + token = kvmppc_hash64_read_pteg(cpu, pte_index); + if (token) { + return token; + } + /* + * pteg read failed, even though we have allocated htab via + * kvmppc_reset_htab. + */ + return 0; + } + /* + * HTAB is controlled by QEMU. Just point to the internally + * accessible PTEG. + */ + if (cpu->env.external_htab) { + token = (uint64_t)(uintptr_t) cpu->env.external_htab + pte_offset; + } else if (cpu->env.htab_base) { + token = cpu->env.htab_base + pte_offset; + } + return token; +} + +void ppc_hash64_stop_access(uint64_t token) +{ + if (kvmppc_kern_htab) { + return kvmppc_hash64_free_pteg(token); + } +} + +static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr hash, bool secondary, target_ulong ptem, ppc_hash_pte64_t *pte) { - hwaddr pte_offset = pteg_off; - target_ulong pte0, pte1; int i; + uint64_t token; + target_ulong pte0, pte1; + target_ulong pte_index; + pte_index = (hash & env->htab_mask) * HPTES_PER_GROUP; + token = ppc_hash64_start_access(ppc_env_get_cpu(env), pte_index); + if (!token) { + return -1; + } for (i = 0; i < HPTES_PER_GROUP; i++) { - pte0 = ppc_hash64_load_hpte0(env, pte_offset); - pte1 = ppc_hash64_load_hpte1(env, pte_offset); + pte0 = ppc_hash64_load_hpte0(env, token, i); + pte1 = ppc_hash64_load_hpte1(env, token, i); if ((pte0 & HPTE64_V_VALID) && (secondary == !!(pte0 & HPTE64_V_SECONDARY)) && HPTE64_V_COMPARE(pte0, ptem)) { pte->pte0 = pte0; pte->pte1 = pte1; - return pte_offset; + ppc_hash64_stop_access(token); + return (pte_index + i) * HASH_PTE_SIZE_64; } - - pte_offset += HASH_PTE_SIZE_64; } - + ppc_hash64_stop_access(token); + /* + * We didn't find a valid entry. + */ return -1; } @@ -340,7 +392,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env, ppc_slb_t *slb, target_ulong eaddr, ppc_hash_pte64_t *pte) { - hwaddr pteg_off, pte_offset; + hwaddr pte_offset; hwaddr hash; uint64_t vsid, epnshift, epnmask, epn, ptem; @@ -375,8 +427,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env, " vsid=" TARGET_FMT_lx " ptem=" TARGET_FMT_lx " hash=" TARGET_FMT_plx "\n", env->htab_base, env->htab_mask, vsid, ptem, hash); - pteg_off = (hash & env->htab_mask) * HASH_PTEG_SIZE_64; - pte_offset = ppc_hash64_pteg_search(env, pteg_off, 0, ptem, pte); + pte_offset = ppc_hash64_pteg_search(env, hash, 0, ptem, pte); if (pte_offset == -1) { /* Secondary PTEG lookup */ @@ -385,8 +436,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env, " hash=" TARGET_FMT_plx "\n", env->htab_base, env->htab_mask, vsid, ptem, ~hash); - pteg_off = (~hash & env->htab_mask) * HASH_PTEG_SIZE_64; - pte_offset = ppc_hash64_pteg_search(env, pteg_off, 1, ptem, pte); + pte_offset = ppc_hash64_pteg_search(env, ~hash, 1, ptem, pte); } return pte_offset; diff --git a/target-ppc/mmu-hash64.h b/target-ppc/mmu-hash64.h index a8da558..e7cb96f 100644 --- a/target-ppc/mmu-hash64.h +++ b/target-ppc/mmu-hash64.h @@ -75,26 +75,34 @@ int ppc_hash64_handle_mmu_fault(CPUPPCState *env, target_ulong address, int rw, #define HPTE64_V_1TB_SEG 0x4000000000000000ULL #define HPTE64_V_VRMA_MASK 0x4001ffffff000000ULL + +extern bool kvmppc_kern_htab; +uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index); +void ppc_hash64_stop_access(uint64_t token); + static inline target_ulong ppc_hash64_load_hpte0(CPUPPCState *env, - hwaddr pte_offset) + uint64_t token, int index) { CPUState *cs = ENV_GET_CPU(env); + uint64_t addr; + addr = token + (index * HASH_PTE_SIZE_64); if (env->external_htab) { - return ldq_p(env->external_htab + pte_offset); + return ldq_p((const void *)(uintptr_t)addr); } else { - return ldq_phys(cs->as, env->htab_base + pte_offset); + return ldq_phys(cs->as, addr); } } static inline target_ulong ppc_hash64_load_hpte1(CPUPPCState *env, - hwaddr pte_offset) + uint64_t token, int index) { CPUState *cs = ENV_GET_CPU(env); + uint64_t addr; + addr = token + (index * HASH_PTE_SIZE_64) + HASH_PTE_SIZE_64/2; if (env->external_htab) { - return ldq_p(env->external_htab + pte_offset + HASH_PTE_SIZE_64/2); + return ldq_p((const void *)(uintptr_t)addr); } else { - return ldq_phys(cs->as, - env->htab_base + pte_offset + HASH_PTE_SIZE_64/2); + return ldq_phys(cs->as, addr); } }