Re: [Qemu-devel] [PATCH -V7 3/3] target-ppc: Fix page table lookup with kvm enabled

Alexander Graf Wed, 18 Dec 2013 01:50:41 -0800

On 07.11.2013, at 15:31, Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com> 
wrote:


> From: "Aneesh Kumar K.V" <aneesh.ku...@linux.vnet.ibm.com>
> 
> With kvm enabled, we store the hash page table information in the hypervisor.
> Use ioctl to read the htab contents. Without this we get the below error when
> trying to read the guest address
> 
> (gdb) x/10 do_fork
> 0xc000000000098660 <do_fork>:   Cannot access memory at address 
> 0xc000000000098660
> (gdb)
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com>
> ---
> Changes from V6:
> * drop htab_fd argument and use global variable kvmppc_kern_htab instead
> 
> hw/ppc/spapr.c          |  1 +
> hw/ppc/spapr_hcall.c    | 50 +++++++++++++++++++------------
> target-ppc/kvm.c        | 53 +++++++++++++++++++++++++++++++++
> target-ppc/kvm_ppc.h    | 19 ++++++++++++
> target-ppc/mmu-hash64.c | 78 ++++++++++++++++++++++++++++++++++++++++---------
> target-ppc/mmu-hash64.h | 23 ++++++++++-----
> 6 files changed, 183 insertions(+), 41 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index d4f3502..8bf886e 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -662,6 +662,7 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
>     if (shift > 0) {
>         /* Kernel handles htab, we don't need to allocate one */
>         spapr->htab_shift = shift;
> +        kvmppc_kern_htab = true;
>     } else {
>         if (!spapr->htab) {
>             /* Allocate an htab if we don't yet have one */
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index f10ba8a..f9ea691 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -50,8 +50,9 @@ static target_ulong h_enter(PowerPCCPU *cpu, 
> sPAPREnvironment *spapr,
>     target_ulong ptel = args[3];
>     target_ulong page_shift = 12;
>     target_ulong raddr;
> -    target_ulong i;
> +    target_ulong index;
>     hwaddr hpte;
> +    void *token;
> 
>     /* only handle 4k and 16M pages for now */
>     if (pteh & HPTE64_V_LARGE) {
> @@ -94,30 +95,37 @@ static target_ulong h_enter(PowerPCCPU *cpu, 
> sPAPREnvironment *spapr,
>     if ((pte_index * HASH_PTE_SIZE_64) & ~env->htab_mask) {
>         return H_PARAMETER;
>     }
> +
> +    index = 0;
> +    hpte = pte_index * HASH_PTE_SIZE_64;
>     if (likely((flags & H_EXACT) == 0)) {
>         pte_index &= ~7ULL;
> -        hpte = pte_index * HASH_PTE_SIZE_64;
> -        for (i = 0; ; ++i) {
> -            if (i == 8) {
> +        token = ppc_hash64_start_access(cpu, pte_index);
> +        do {
> +            if (index == 8) {
> +                ppc_hash64_stop_access(token);
>                 return H_PTEG_FULL;
>             }
> -            if ((ppc_hash64_load_hpte0(env, hpte) & HPTE64_V_VALID) == 0) {
> +            if ((ppc_hash64_load_hpte0(env, token, index) & HPTE64_V_VALID) 
> == 0) {
>                 break;
>             }
> -            hpte += HASH_PTE_SIZE_64;
> -        }
> +        } while (index++);
> +        ppc_hash64_stop_access(token);
>     } else {
> -        i = 0;
> -        hpte = pte_index * HASH_PTE_SIZE_64;
> -        if (ppc_hash64_load_hpte0(env, hpte) & HPTE64_V_VALID) {
> +        token = ppc_hash64_start_access(cpu, pte_index);
> +        if (ppc_hash64_load_hpte0(env, token, 0) & HPTE64_V_VALID) {
> +            ppc_hash64_stop_access(token);
>             return H_PTEG_FULL;
>         }
> +        ppc_hash64_stop_access(token);
>     }
> +    hpte += index * HASH_PTE_SIZE_64;
> +
>     ppc_hash64_store_hpte1(env, hpte, ptel);

I'm not a big fan of fixing the read part, but leaving the write part broken. 
However I can see value in read only already, so I'm fine if the write part 
follows later.

>     /* eieio();  FIXME: need some sort of barrier for smp? */
>     ppc_hash64_store_hpte0(env, hpte, pteh | HPTE64_V_HPTE_DIRTY);
> 
> -    args[0] = pte_index + i;
> +    args[0] = pte_index + index;
>     return H_SUCCESS;
> }
> 
> @@ -134,16 +142,17 @@ static RemoveResult remove_hpte(CPUPPCState *env, 
> target_ulong ptex,
>                                 target_ulong *vp, target_ulong *rp)
> {
>     hwaddr hpte;
> +    void *token;
>     target_ulong v, r, rb;
> 
>     if ((ptex * HASH_PTE_SIZE_64) & ~env->htab_mask) {
>         return REMOVE_PARM;
>     }
> 
> -    hpte = ptex * HASH_PTE_SIZE_64;
> -
> -    v = ppc_hash64_load_hpte0(env, hpte);
> -    r = ppc_hash64_load_hpte1(env, hpte);
> +    token = ppc_hash64_start_access(ppc_env_get_cpu(env), ptex);
> +    v = ppc_hash64_load_hpte0(env, token, 0);
> +    r = ppc_hash64_load_hpte1(env, token, 0);
> +    ppc_hash64_stop_access(token);
> 
>     if ((v & HPTE64_V_VALID) == 0 ||
>         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
> @@ -152,6 +161,7 @@ static RemoveResult remove_hpte(CPUPPCState *env, 
> target_ulong ptex,
>     }
>     *vp = v;
>     *rp = r;
> +    hpte = ptex * HASH_PTE_SIZE_64;
>     ppc_hash64_store_hpte0(env, hpte, HPTE64_V_HPTE_DIRTY);
>     rb = compute_tlbie_rb(v, r, ptex);
>     ppc_tlb_invalidate_one(env, rb);
> @@ -260,16 +270,17 @@ static target_ulong h_protect(PowerPCCPU *cpu, 
> sPAPREnvironment *spapr,
>     target_ulong pte_index = args[1];
>     target_ulong avpn = args[2];
>     hwaddr hpte;
> +    void *token;
>     target_ulong v, r, rb;
> 
>     if ((pte_index * HASH_PTE_SIZE_64) & ~env->htab_mask) {
>         return H_PARAMETER;
>     }
> 
> -    hpte = pte_index * HASH_PTE_SIZE_64;
> -
> -    v = ppc_hash64_load_hpte0(env, hpte);
> -    r = ppc_hash64_load_hpte1(env, hpte);
> +    token = ppc_hash64_start_access(cpu, pte_index);
> +    v = ppc_hash64_load_hpte0(env, token, 0);
> +    r = ppc_hash64_load_hpte1(env, token, 0);
> +    ppc_hash64_stop_access(token);
> 
>     if ((v & HPTE64_V_VALID) == 0 ||
>         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
> @@ -282,6 +293,7 @@ static target_ulong h_protect(PowerPCCPU *cpu, 
> sPAPREnvironment *spapr,
>     r |= (flags << 48) & HPTE64_R_KEY_HI;
>     r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
>     rb = compute_tlbie_rb(v, r, pte_index);
> +    hpte = pte_index * HASH_PTE_SIZE_64;
>     ppc_hash64_store_hpte0(env, hpte, (v & ~HPTE64_V_VALID) | 
> HPTE64_V_HPTE_DIRTY);
>     ppc_tlb_invalidate_one(env, rb);
>     ppc_hash64_store_hpte1(env, hpte, r);
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index ac70efbf..b8f9544 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -1783,6 +1783,11 @@ bool kvmppc_has_cap_epr(void)
>     return cap_epr;
> }
> 
> +bool kvmppc_has_cap_htab_fd(void)
> +{
> +    return cap_htab_fd;
> +}
> +
> static int kvm_ppc_register_host_cpu_type(void)
> {
>     TypeInfo type_info = {
> @@ -1888,3 +1893,51 @@ int kvm_arch_on_sigbus(int code, void *addr)
> void kvm_arch_init_irq_routing(KVMState *s)
> {
> }
> +
> +struct kvm_get_htab_buf {
> +    struct kvm_get_htab_header header;
> +    /*
> +     * We required one extra byte for read
> +     */
> +    unsigned long hpte[(HPTES_PER_GROUP * 2) + 1];
> +};
> +
> +void *kvmppc_hash64_read_pteg(PowerPCCPU *cpu, unsigned long pte_index)
> +{
> +    int htab_fd;
> +    struct kvm_get_htab_fd ghf;
> +    struct kvm_get_htab_buf  *hpte_buf;
> +
> +    ghf.flags = 0;
> +    ghf.start_index = pte_index;
> +    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
> +    if (htab_fd < 0) {
> +        goto error_out;
> +    }
> +
> +    hpte_buf = g_malloc0(sizeof(*hpte_buf));
> +    /*
> +     * Read the hpte group
> +     */
> +    if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
> +        goto out_close;
> +    }
> +
> +    close(htab_fd);
> +    return hpte_buf->hpte;
> +
> +out_close:
> +    g_free(hpte_buf);
> +    close(htab_fd);
> +error_out:
> +    return NULL;
> +}
> +
> +void kvmppc_hash64_free_pteg(void *token)
> +{
> +    struct kvm_get_htab_buf *htab_buf;
> +
> +    htab_buf = container_of(token, struct kvm_get_htab_buf, hpte);
> +    g_free(htab_buf);
> +    return;
> +}
> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
> index 4ae7bf2..9981e34 100644
> --- a/target-ppc/kvm_ppc.h
> +++ b/target-ppc/kvm_ppc.h
> @@ -38,10 +38,13 @@ uint64_t kvmppc_rma_size(uint64_t current_size, unsigned 
> int hash_shift);
> #endif /* !CONFIG_USER_ONLY */
> int kvmppc_fixup_cpu(PowerPCCPU *cpu);
> bool kvmppc_has_cap_epr(void);
> +bool kvmppc_has_cap_htab_fd(void);
> int kvmppc_get_htab_fd(bool write);
> int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns);
> int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
>                            uint16_t n_valid, uint16_t n_invalid);
> +void *kvmppc_hash64_read_pteg(PowerPCCPU *cpu, unsigned long pte_index);
> +void kvmppc_hash64_free_pteg(void *token);
> 
> #else
> 
> @@ -164,6 +167,11 @@ static inline bool kvmppc_has_cap_epr(void)
>     return false;
> }
> 
> +static inline bool kvmppc_has_cap_htab_fd(void)
> +{
> +    return false;
> +}
> +
> static inline int kvmppc_get_htab_fd(bool write)
> {
>     return -1;
> @@ -181,6 +189,17 @@ static inline int kvmppc_load_htab_chunk(QEMUFile *f, 
> int fd, uint32_t index,
>     abort();
> }
> 
> +static inline void *kvmppc_hash64_read_pteg(PowerPCCPU *cpu,
> +                                            unsigned long pte_index)
> +{
> +    abort();
> +}
> +
> +static inline void kvmppc_hash64_free_pteg(void *token)
> +{
> +    abort();
> +}
> +
> #endif
> 
> #ifndef CONFIG_KVM
> diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
> index 67fc1b5..f59d199 100644
> --- a/target-ppc/mmu-hash64.c
> +++ b/target-ppc/mmu-hash64.c
> @@ -41,6 +41,11 @@
> #endif
> 
> /*
> + * Used to indicate whether we have allocated htab in the
> + * host kernel
> + */
> +bool kvmppc_kern_htab;
> +/*
>  * SLB handling
>  */
> 
> @@ -302,29 +307,76 @@ static int ppc_hash64_amr_prot(CPUPPCState *env, 
> ppc_hash_pte64_t pte)
>     return prot;
> }
> 
> -static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr pteg_off,
> +void *ppc_hash64_start_access(PowerPCCPU *cpu, unsigned long pte_index)
> +{
> +    void *token = NULL;
> +    hwaddr pte_offset;
> +
> +    pte_offset = pte_index * HASH_PTE_SIZE_64;
> +    if (kvmppc_kern_htab) {
> +        /*
> +         * HTAB is controlled by KVM. Fetch the PTEG into a new buffer.
> +         */
> +        token = kvmppc_hash64_read_pteg(cpu, pte_index);
> +        if (token) {
> +            return token;
> +        }
> +        /*
> +         * pteg read failed, even though we have allocated htab via
> +         * kvmppc_reset_htab.
> +         */
> +        return NULL;
> +    }
> +    /*
> +     * HTAB is controlled by QEMU. Just point to the internally
> +     * accessible PTEG.
> +     */
> +    if (cpu->env.external_htab) {
> +        token = cpu->env.external_htab + pte_offset;
> +    } else if (cpu->env.htab_base) {
> +        token = (uint8_t *) cpu->env.htab_base + pte_offset;

This breaks if you run a 64-bit guest on a 32-bit host trying to access memory 
beyond 4GB. In that case htab_base is hwaddr (64bit) while uint8_t is only 
32bit wide.

Just pass a 64bit token around. That makes it safe and easy.


Alex

> +    }
> +    return token;
> +}
> +
> +void ppc_hash64_stop_access(void *token)
> +{
> +    if (kvmppc_kern_htab) {
> +        return kvmppc_hash64_free_pteg(token);
> +    }
> +}
> +
> +static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr hash,
>                                      bool secondary, target_ulong ptem,
>                                      ppc_hash_pte64_t *pte)
> {
> -    hwaddr pte_offset = pteg_off;
> -    target_ulong pte0, pte1;
>     int i;
> +    void *token;
> +    target_ulong pte0, pte1;
> +    unsigned long pte_index;
> 
> +    pte_index = (hash & env->htab_mask) * HPTES_PER_GROUP;
> +    token = ppc_hash64_start_access(ppc_env_get_cpu(env), pte_index);
> +    if (!token) {
> +        return -1;
> +    }
>     for (i = 0; i < HPTES_PER_GROUP; i++) {
> -        pte0 = ppc_hash64_load_hpte0(env, pte_offset);
> -        pte1 = ppc_hash64_load_hpte1(env, pte_offset);
> +        pte0 = ppc_hash64_load_hpte0(env, token, i);
> +        pte1 = ppc_hash64_load_hpte1(env, token, i);
> 
>         if ((pte0 & HPTE64_V_VALID)
>             && (secondary == !!(pte0 & HPTE64_V_SECONDARY))
>             && HPTE64_V_COMPARE(pte0, ptem)) {
>             pte->pte0 = pte0;
>             pte->pte1 = pte1;
> -            return pte_offset;
> +            ppc_hash64_stop_access(token);
> +            return (pte_index + i) * HASH_PTE_SIZE_64;
>         }
> -
> -        pte_offset += HASH_PTE_SIZE_64;
>     }
> -
> +    ppc_hash64_stop_access(token);
> +    /*
> +     * We didn't find a valid entry.
> +     */
>     return -1;
> }
> 
> @@ -332,7 +384,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env,
>                                      ppc_slb_t *slb, target_ulong eaddr,
>                                      ppc_hash_pte64_t *pte)
> {
> -    hwaddr pteg_off, pte_offset;
> +    hwaddr pte_offset;
>     hwaddr hash;
>     uint64_t vsid, epnshift, epnmask, epn, ptem;
> 
> @@ -367,8 +419,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env,
>             " vsid=" TARGET_FMT_lx " ptem=" TARGET_FMT_lx
>             " hash=" TARGET_FMT_plx "\n",
>             env->htab_base, env->htab_mask, vsid, ptem,  hash);
> -    pteg_off = (hash * HASH_PTEG_SIZE_64) & env->htab_mask;
> -    pte_offset = ppc_hash64_pteg_search(env, pteg_off, 0, ptem, pte);
> +    pte_offset = ppc_hash64_pteg_search(env, hash, 0, ptem, pte);
> 
>     if (pte_offset == -1) {
>         /* Secondary PTEG lookup */
> @@ -377,8 +428,7 @@ static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env,
>                 " hash=" TARGET_FMT_plx "\n", env->htab_base,
>                 env->htab_mask, vsid, ptem, ~hash);
> 
> -        pteg_off = (~hash * HASH_PTEG_SIZE_64) & env->htab_mask;
> -        pte_offset = ppc_hash64_pteg_search(env, pteg_off, 1, ptem, pte);
> +        pte_offset = ppc_hash64_pteg_search(env, ~hash, 1, ptem, pte);
>     }
> 
>     return pte_offset;
> diff --git a/target-ppc/mmu-hash64.h b/target-ppc/mmu-hash64.h
> index 55f5a23..c6f64f6 100644
> --- a/target-ppc/mmu-hash64.h
> +++ b/target-ppc/mmu-hash64.h
> @@ -75,23 +75,30 @@ int ppc_hash64_handle_mmu_fault(CPUPPCState *env, 
> target_ulong address, int rw,
> #define HPTE64_V_1TB_SEG        0x4000000000000000ULL
> #define HPTE64_V_VRMA_MASK      0x4001ffffff000000ULL
> 
> -static inline target_ulong ppc_hash64_load_hpte0(CPUPPCState *env,
> -                                                 hwaddr pte_offset)
> +
> +extern bool kvmppc_kern_htab;
> +void *ppc_hash64_start_access(PowerPCCPU *cpu, unsigned long pte_index);
> +void ppc_hash64_stop_access(void *token);
> +
> +static inline target_ulong ppc_hash64_load_hpte0(CPUPPCState *env, void 
> *token,
> +                                                 int index)
> {
> +    index *= HASH_PTE_SIZE_64;
>     if (env->external_htab) {
> -        return  ldq_p(env->external_htab + pte_offset);
> +        return  ldq_p(token + index);
>     } else {
> -        return ldq_phys(env->htab_base + pte_offset);
> +        return ldq_phys((uint64_t)(token + index));
>     }
> }
> 
> -static inline target_ulong ppc_hash64_load_hpte1(CPUPPCState *env,
> -                                                 hwaddr pte_offset)
> +static inline target_ulong ppc_hash64_load_hpte1(CPUPPCState *env, void 
> *token,
> +                                                 int index)
> {
> +    index *= HASH_PTE_SIZE_64;
>     if (env->external_htab) {
> -        return ldq_p(env->external_htab + pte_offset + HASH_PTE_SIZE_64/2);
> +        return  ldq_p(token + index + HASH_PTE_SIZE_64/2);
>     } else {
> -        return ldq_phys(env->htab_base + pte_offset + HASH_PTE_SIZE_64/2);
> +        return ldq_phys((uint64_t)(token + index + HASH_PTE_SIZE_64/2));
>     }
> }
> 
> -- 
> 1.8.3.2
>

Re: [Qemu-devel] [PATCH -V7 3/3] target-ppc: Fix page table lookup with kvm enabled

Reply via email to