On 07.11.2013, at 15:31, Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com> wrote:
> From: "Aneesh Kumar K.V" <aneesh.ku...@linux.vnet.ibm.com> > > With kvm enabled, we store the hash page table information in the hypervisor. > Use ioctl to read the htab contents. Without this we get the below error when > trying to read the guest address > > (gdb) x/10 do_fork > 0xc000000000098660 <do_fork>: Cannot access memory at address > 0xc000000000098660 > (gdb) > > Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com> > --- > Changes from V6: > * drop htab_fd argument and use global variable kvmppc_kern_htab instead > > hw/ppc/spapr.c | 1 + > hw/ppc/spapr_hcall.c | 50 +++++++++++++++++++------------ > target-ppc/kvm.c | 53 +++++++++++++++++++++++++++++++++ > target-ppc/kvm_ppc.h | 19 ++++++++++++ > target-ppc/mmu-hash64.c | 78 ++++++++++++++++++++++++++++++++++++++++--------- > target-ppc/mmu-hash64.h | 23 ++++++++++----- > 6 files changed, 183 insertions(+), 41 deletions(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index d4f3502..8bf886e 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -662,6 +662,7 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) > if (shift > 0) { > /* Kernel handles htab, we don't need to allocate one */ > spapr->htab_shift = shift; > + kvmppc_kern_htab = true; > } else { > if (!spapr->htab) { > /* Allocate an htab if we don't yet have one */ > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c > index f10ba8a..f9ea691 100644 > --- a/hw/ppc/spapr_hcall.c > +++ b/hw/ppc/spapr_hcall.c > @@ -50,8 +50,9 @@ static target_ulong h_enter(PowerPCCPU *cpu, > sPAPREnvironment *spapr, > target_ulong ptel = args[3]; > target_ulong page_shift = 12; > target_ulong raddr; > - target_ulong i; > + target_ulong index; > hwaddr hpte; > + void *token; > > /* only handle 4k and 16M pages for now */ > if (pteh & HPTE64_V_LARGE) { > @@ -94,30 +95,37 @@ static target_ulong h_enter(PowerPCCPU *cpu, > sPAPREnvironment *spapr, > if ((pte_index * HASH_PTE_SIZE_64) & ~env->htab_mask) { > return H_PARAMETER; > } > + > + index = 0; > + hpte = pte_index * HASH_PTE_SIZE_64; > if (likely((flags & H_EXACT) == 0)) { > pte_index &= ~7ULL; > - hpte = pte_index * HASH_PTE_SIZE_64; > - for (i = 0; ; ++i) { > - if (i == 8) { > + token = ppc_hash64_start_access(cpu, pte_index); > + do { > + if (index == 8) { > + ppc_hash64_stop_access(token); > return H_PTEG_FULL; > } > - if ((ppc_hash64_load_hpte0(env, hpte) & HPTE64_V_VALID) == 0) { > + if ((ppc_hash64_load_hpte0(env, token, index) & HPTE64_V_VALID) > == 0) { > break; > } > - hpte += HASH_PTE_SIZE_64; > - } > + } while (index++); > + ppc_hash64_stop_access(token); > } else { > - i = 0; > - hpte = pte_index * HASH_PTE_SIZE_64; > - if (ppc_hash64_load_hpte0(env, hpte) & HPTE64_V_VALID) { > + token = ppc_hash64_start_access(cpu, pte_index); > + if (ppc_hash64_load_hpte0(env, token, 0) & HPTE64_V_VALID) { > + ppc_hash64_stop_access(token); > return H_PTEG_FULL; > } > + ppc_hash64_stop_access(token); > } > + hpte += index * HASH_PTE_SIZE_64; > + > ppc_hash64_store_hpte1(env, hpte, ptel); I'm not a big fan of fixing the read part, but leaving the write part broken. However I can see value in read only already, so I'm fine if the write part follows later. > /* eieio(); FIXME: need some sort of barrier for smp? */ > ppc_hash64_store_hpte0(env, hpte, pteh | HPTE64_V_HPTE_DIRTY); > > - args[0] = pte_index + i; > + args[0] = pte_index + index; > return H_SUCCESS; > } > > @@ -134,16 +142,17 @@ static RemoveResult remove_hpte(CPUPPCState *env, > target_ulong ptex, > target_ulong *vp, target_ulong *rp) > { > hwaddr hpte; > + void *token; > target_ulong v, r, rb; > > if ((ptex * HASH_PTE_SIZE_64) & ~env->htab_mask) { > return REMOVE_PARM; > } > > - hpte = ptex * HASH_PTE_SIZE_64; > - > - v = ppc_hash64_load_hpte0(env, hpte); > - r = ppc_hash64_load_hpte1(env, hpte); > + token = ppc_hash64_start_access(ppc_env_get_cpu(env), ptex); > + v = ppc_hash64_load_hpte0(env, token, 0); > + r = ppc_hash64_load_hpte1(env, token, 0); > + ppc_hash64_stop_access(token); > > if ((v & HPTE64_V_VALID) == 0 || > ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) || > @@ -152,6 +161,7 @@ static RemoveResult remove_hpte(CPUPPCState *env, > target_ulong ptex, > } > *vp = v; > *rp = r; > + hpte = ptex * HASH_PTE_SIZE_64; > ppc_hash64_store_hpte0(env, hpte, HPTE64_V_HPTE_DIRTY); > rb = compute_tlbie_rb(v, r, ptex); > ppc_tlb_invalidate_one(env, rb); > @@ -260,16 +270,17 @@ static target_ulong h_protect(PowerPCCPU *cpu, > sPAPREnvironment *spapr, > target_ulong pte_index = args[1]; > target_ulong avpn = args[2]; > hwaddr hpte; > + void *token; > target_ulong v, r, rb; > > if ((pte_index * HASH_PTE_SIZE_64) & ~env->htab_mask) { > return H_PARAMETER; > } > > - hpte = pte_index * HASH_PTE_SIZE_64; > - > - v = ppc_hash64_load_hpte0(env, hpte); > - r = ppc_hash64_load_hpte1(env, hpte); > + token = ppc_hash64_start_access(cpu, pte_index); > + v = ppc_hash64_load_hpte0(env, token, 0); > + r = ppc_hash64_load_hpte1(env, token, 0); > + ppc_hash64_stop_access(token); > > if ((v & HPTE64_V_VALID) == 0 || > ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) { > @@ -282,6 +293,7 @@ static target_ulong h_protect(PowerPCCPU *cpu, > sPAPREnvironment *spapr, > r |= (flags << 48) & HPTE64_R_KEY_HI; > r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO); > rb = compute_tlbie_rb(v, r, pte_index); > + hpte = pte_index * HASH_PTE_SIZE_64; > ppc_hash64_store_hpte0(env, hpte, (v & ~HPTE64_V_VALID) | > HPTE64_V_HPTE_DIRTY); > ppc_tlb_invalidate_one(env, rb); > ppc_hash64_store_hpte1(env, hpte, r); > diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c > index ac70efbf..b8f9544 100644 > --- a/target-ppc/kvm.c > +++ b/target-ppc/kvm.c > @@ -1783,6 +1783,11 @@ bool kvmppc_has_cap_epr(void) > return cap_epr; > } > > +bool kvmppc_has_cap_htab_fd(void) > +{ > + return cap_htab_fd; > +} > + > static int kvm_ppc_register_host_cpu_type(void) > { > TypeInfo type_info = { > @@ -1888,3 +1893,51 @@ int kvm_arch_on_sigbus(int code, void *addr) > void kvm_arch_init_irq_routing(KVMState *s) > { > } > + > +struct kvm_get_htab_buf { > + struct kvm_get_htab_header header; > + /* > + * We required one extra byte for read > + */ > + unsigned long hpte[(HPTES_PER_GROUP * 2) + 1]; > +}; > + > +void *kvmppc_hash64_read_pteg(PowerPCCPU *cpu, unsigned long pte_index) > +{ > + int htab_fd; > + struct kvm_get_htab_fd ghf; > + struct kvm_get_htab_buf *hpte_buf; > + > + ghf.flags = 0; > + ghf.start_index = pte_index; > + htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); > + if (htab_fd < 0) { > + goto error_out; > + } > + > + hpte_buf = g_malloc0(sizeof(*hpte_buf)); > + /* > + * Read the hpte group > + */ > + if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) { > + goto out_close; > + } > + > + close(htab_fd); > + return hpte_buf->hpte; > + > +out_close: > + g_free(hpte_buf); > + close(htab_fd); > +error_out: > + return NULL; > +} > + > +void kvmppc_hash64_free_pteg(void *token) > +{ > + struct kvm_get_htab_buf *htab_buf; > + > + htab_buf = container_of(token, struct kvm_get_htab_buf, hpte); > + g_free(htab_buf); > + return; > +} > diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h > index 4ae7bf2..9981e34 100644 > --- a/target-ppc/kvm_ppc.h > +++ b/target-ppc/kvm_ppc.h > @@ -38,10 +38,13 @@ uint64_t kvmppc_rma_size(uint64_t current_size, unsigned > int hash_shift); > #endif /* !CONFIG_USER_ONLY */ > int kvmppc_fixup_cpu(PowerPCCPU *cpu); > bool kvmppc_has_cap_epr(void); > +bool kvmppc_has_cap_htab_fd(void); > int kvmppc_get_htab_fd(bool write); > int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns); > int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, > uint16_t n_valid, uint16_t n_invalid); > +void *kvmppc_hash64_read_pteg(PowerPCCPU *cpu, unsigned long pte_index); > +void kvmppc_hash64_free_pteg(void *token); > > #else > > @@ -164,6 +167,11 @@ static inline bool kvmppc_has_cap_epr(void) > return false; > } > > +static inline bool kvmppc_has_cap_htab_fd(void) > +{ > + return false; > +} > + > static inline int kvmppc_get_htab_fd(bool write) > { > return -1; > @@ -181,6 +189,17 @@ static inline int kvmppc_load_htab_chunk(QEMUFile *f, > int fd, uint32_t index, > abort(); > } > > +static inline void *kvmppc_hash64_read_pteg(PowerPCCPU *cpu, > + unsigned long pte_index) > +{ > + abort(); > +} > + > +static inline void kvmppc_hash64_free_pteg(void *token) > +{ > + abort(); > +} > + > #endif > > #ifndef CONFIG_KVM > diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c > index 67fc1b5..f59d199 100644 > --- a/target-ppc/mmu-hash64.c > +++ b/target-ppc/mmu-hash64.c > @@ -41,6 +41,11 @@ > #endif > > /* > + * Used to indicate whether we have allocated htab in the > + * host kernel > + */ > +bool kvmppc_kern_htab; > +/* > * SLB handling > */ > > @@ -302,29 +307,76 @@ static int ppc_hash64_amr_prot(CPUPPCState *env, > ppc_hash_pte64_t pte) > return prot; > } > > -static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr pteg_off, > +void *ppc_hash64_start_access(PowerPCCPU *cpu, unsigned long pte_index) > +{ > + void *token = NULL; > + hwaddr pte_offset; > + > + pte_offset = pte_index * HASH_PTE_SIZE_64; > + if (kvmppc_kern_htab) { > + /* > + * HTAB is controlled by KVM. Fetch the PTEG into a new buffer. > + */ > + token = kvmppc_hash64_read_pteg(cpu, pte_index); > + if (token) { > + return token; > + } > + /* > + * pteg read failed, even though we have allocated htab via > + * kvmppc_reset_htab. > + */ > + return NULL; > + } > + /* > + * HTAB is controlled by QEMU. Just point to the internally > + * accessible PTEG. > + */ > + if (cpu->env.external_htab) { > + token = cpu->env.external_htab + pte_offset; > + } else if (cpu->env.htab_base) { > + token = (uint8_t *) cpu->env.htab_base + pte_offset; This breaks if you run a 64-bit guest on a 32-bit host trying to access memory beyond 4GB. In that case htab_base is hwaddr (64bit) while uint8_t is only 32bit wide. Just pass a 64bit token around. That makes it safe and easy. Alex > + } > + return token; > +} > + > +void ppc_hash64_stop_access(void *token) > +{ > + if (kvmppc_kern_htab) { > + return kvmppc_hash64_free_pteg(token); > + } > +} > + > +static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr hash, > bool secondary, target_ulong ptem, > ppc_hash_pte64_t *pte) > { > - hwaddr pte_offset = pteg_off; > - target_ulong pte0, pte1; > int i; > + void *token; > + target_ulong pte0, pte1; > + unsigned long pte_index; > > + pte_index = (hash & env->htab_mask) * HPTES_PER_GROUP; > + token = ppc_hash64_start_access(ppc_env_get_cpu(env), pte_index); > + if (!token) { > + return -1; > + } > for (i = 0; i < HPTES_PER_GROUP; i++) { > - pte0 = ppc_hash64_load_hpte0(env, pte_offset); > - pte1 = ppc_hash64_load_hpte1(env, pte_offset); > + pte0 = ppc_hash64_load_hpte0(env, token, i); > + pte1 = ppc_hash64_load_hpte1(env, token, i); > > if ((pte0 & HPTE64_V_VALID) > && (secondary == !!(pte0 & HPTE64_V_SECONDARY)) > && HPTE64_V_COMPARE(pte0, ptem)) { > pte->pte0 = pte0; > pte->pte1 = pte1; > - return pte_offset; > + ppc_hash64_stop_access(token); > + return (pte_index + i) * HASH_PTE_SIZE_64; > } > - > - pte_offset += HASH_PTE_SIZE_64; > } > - > + ppc_hash64_stop_access(token); > + /* > + * We didn't find a valid entry. > + */ > return -1; > } > > @@ -332,7 +384,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env, > ppc_slb_t *slb, target_ulong eaddr, > ppc_hash_pte64_t *pte) > { > - hwaddr pteg_off, pte_offset; > + hwaddr pte_offset; > hwaddr hash; > uint64_t vsid, epnshift, epnmask, epn, ptem; > > @@ -367,8 +419,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env, > " vsid=" TARGET_FMT_lx " ptem=" TARGET_FMT_lx > " hash=" TARGET_FMT_plx "\n", > env->htab_base, env->htab_mask, vsid, ptem, hash); > - pteg_off = (hash * HASH_PTEG_SIZE_64) & env->htab_mask; > - pte_offset = ppc_hash64_pteg_search(env, pteg_off, 0, ptem, pte); > + pte_offset = ppc_hash64_pteg_search(env, hash, 0, ptem, pte); > > if (pte_offset == -1) { > /* Secondary PTEG lookup */ > @@ -377,8 +428,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env, > " hash=" TARGET_FMT_plx "\n", env->htab_base, > env->htab_mask, vsid, ptem, ~hash); > > - pteg_off = (~hash * HASH_PTEG_SIZE_64) & env->htab_mask; > - pte_offset = ppc_hash64_pteg_search(env, pteg_off, 1, ptem, pte); > + pte_offset = ppc_hash64_pteg_search(env, ~hash, 1, ptem, pte); > } > > return pte_offset; > diff --git a/target-ppc/mmu-hash64.h b/target-ppc/mmu-hash64.h > index 55f5a23..c6f64f6 100644 > --- a/target-ppc/mmu-hash64.h > +++ b/target-ppc/mmu-hash64.h > @@ -75,23 +75,30 @@ int ppc_hash64_handle_mmu_fault(CPUPPCState *env, > target_ulong address, int rw, > #define HPTE64_V_1TB_SEG 0x4000000000000000ULL > #define HPTE64_V_VRMA_MASK 0x4001ffffff000000ULL > > -static inline target_ulong ppc_hash64_load_hpte0(CPUPPCState *env, > - hwaddr pte_offset) > + > +extern bool kvmppc_kern_htab; > +void *ppc_hash64_start_access(PowerPCCPU *cpu, unsigned long pte_index); > +void ppc_hash64_stop_access(void *token); > + > +static inline target_ulong ppc_hash64_load_hpte0(CPUPPCState *env, void > *token, > + int index) > { > + index *= HASH_PTE_SIZE_64; > if (env->external_htab) { > - return ldq_p(env->external_htab + pte_offset); > + return ldq_p(token + index); > } else { > - return ldq_phys(env->htab_base + pte_offset); > + return ldq_phys((uint64_t)(token + index)); > } > } > > -static inline target_ulong ppc_hash64_load_hpte1(CPUPPCState *env, > - hwaddr pte_offset) > +static inline target_ulong ppc_hash64_load_hpte1(CPUPPCState *env, void > *token, > + int index) > { > + index *= HASH_PTE_SIZE_64; > if (env->external_htab) { > - return ldq_p(env->external_htab + pte_offset + HASH_PTE_SIZE_64/2); > + return ldq_p(token + index + HASH_PTE_SIZE_64/2); > } else { > - return ldq_phys(env->htab_base + pte_offset + HASH_PTE_SIZE_64/2); > + return ldq_phys((uint64_t)(token + index + HASH_PTE_SIZE_64/2)); > } > } > > -- > 1.8.3.2 >