On Mon, Mar 07, 2016 at 02:41:14PM +1100, Alexey Kardashevskiy wrote:
> The existing in-kernel TCE table for emulated devices contains
> guest physical addresses which are accesses by emulated devices.
> Since we need to keep this information for VFIO devices too
> in order to implement H_GET_TCE, we are reusing it.
> 
> This adds IOMMU group list to kvmppc_spapr_tce_table. Each group
> will have an iommu_table pointer.
> 
> This adds kvm_spapr_tce_attach_iommu_group() helper and its detach
> counterpart to manage the lists.
> 
> This puts a group when:
> - guest copy of TCE table is destroyed when TCE table fd is closed;
> - kvm_spapr_tce_detach_iommu_group() is called from
> the KVM_DEV_VFIO_GROUP_DEL ioctl handler in the case vfio-pci hotunplug
> (will be added in the following patch).
> 
> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru>
> ---
>  arch/powerpc/include/asm/kvm_host.h |   8 +++
>  arch/powerpc/include/asm/kvm_ppc.h  |   6 ++
>  arch/powerpc/kvm/book3s_64_vio.c    | 108 
> ++++++++++++++++++++++++++++++++++++
>  3 files changed, 122 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index 2e7c791..2c5c823 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -178,6 +178,13 @@ struct kvmppc_pginfo {
>       atomic_t refcnt;
>  };
>  
> +struct kvmppc_spapr_tce_group {
> +     struct list_head next;
> +     struct rcu_head rcu;
> +     struct iommu_group *refgrp;/* for reference counting only */
> +     struct iommu_table *tbl;
> +};
> +
>  struct kvmppc_spapr_tce_table {
>       struct list_head list;
>       struct kvm *kvm;
> @@ -186,6 +193,7 @@ struct kvmppc_spapr_tce_table {
>       u32 page_shift;
>       u64 offset;             /* in pages */
>       u64 size;               /* window size in pages */
> +     struct list_head groups;
>       struct page *pages[0];
>  };
>  
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
> b/arch/powerpc/include/asm/kvm_ppc.h
> index 2544eda..d1482dc 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -164,6 +164,12 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
>                       struct kvm_memory_slot *memslot, unsigned long porder);
>  extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
>  
> +extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm,
> +                             unsigned long liobn,
> +                             phys_addr_t start_addr,
> +                             struct iommu_group *grp);
> +extern void kvm_spapr_tce_detach_iommu_group(struct kvm *kvm,
> +                             struct iommu_group *grp);
>  extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>                               struct kvm_create_spapr_tce_64 *args);
>  extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
> diff --git a/arch/powerpc/kvm/book3s_64_vio.c 
> b/arch/powerpc/kvm/book3s_64_vio.c
> index 2c2d103..846d16d 100644
> --- a/arch/powerpc/kvm/book3s_64_vio.c
> +++ b/arch/powerpc/kvm/book3s_64_vio.c
> @@ -27,6 +27,7 @@
>  #include <linux/hugetlb.h>
>  #include <linux/list.h>
>  #include <linux/anon_inodes.h>
> +#include <linux/iommu.h>
>  
>  #include <asm/tlbflush.h>
>  #include <asm/kvm_ppc.h>
> @@ -95,10 +96,18 @@ static void release_spapr_tce_table(struct rcu_head *head)
>       struct kvmppc_spapr_tce_table *stt = container_of(head,
>                       struct kvmppc_spapr_tce_table, rcu);
>       unsigned long i, npages = kvmppc_tce_pages(stt->size);
> +     struct kvmppc_spapr_tce_group *kg;
>  
>       for (i = 0; i < npages; i++)
>               __free_page(stt->pages[i]);
>  
> +     while (!list_empty(&stt->groups)) {
> +             kg = list_first_entry(&stt->groups,
> +                             struct kvmppc_spapr_tce_group, next);
> +             list_del(&kg->next);
> +             kfree(kg);
> +     }
> +
>       kfree(stt);
>  }
>  
> @@ -129,9 +138,15 @@ static int kvm_spapr_tce_mmap(struct file *file, struct 
> vm_area_struct *vma)
>  static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
>  {
>       struct kvmppc_spapr_tce_table *stt = filp->private_data;
> +     struct kvmppc_spapr_tce_group *kg;
>  
>       list_del_rcu(&stt->list);
>  
> +     list_for_each_entry_rcu(kg, &stt->groups, next) {
> +             iommu_group_put(kg->refgrp);
> +             kg->refgrp = NULL;
> +     }

What's the reason for this kind of two-phase deletion?  Dereffing the
group here, and setting to NULL, then actually removing from the liast above.


>       kvm_put_kvm(stt->kvm);
>  
>       kvmppc_account_memlimit(
> @@ -146,6 +161,98 @@ static const struct file_operations kvm_spapr_tce_fops = 
> {
>       .release        = kvm_spapr_tce_release,
>  };
>  
> +extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm,
> +                             unsigned long liobn,
> +                             phys_addr_t start_addr,
> +                             struct iommu_group *grp)
> +{
> +     struct kvmppc_spapr_tce_table *stt = NULL;
> +     struct iommu_table_group *table_group;
> +     long i;
> +     bool found = false;
> +     struct kvmppc_spapr_tce_group *kg;
> +     struct iommu_table *tbltmp;
> +
> +     /* Check this LIOBN hasn't been previously allocated */

This comment does not appear to be correct.

> +     list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
> +             if (stt->liobn == liobn) {
> +                     if ((stt->offset << stt->page_shift) != start_addr)
> +                             return -EINVAL;
> +
> +                     found = true;
> +                     break;
> +             }
> +     }
> +
> +     if (!found)
> +             return -ENODEV;
> +
> +     /* Find IOMMU group and table at @start_addr */
> +     table_group = iommu_group_get_iommudata(grp);
> +     if (!table_group)
> +             return -EFAULT;
> +
> +     tbltmp = NULL;
> +     for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
> +             struct iommu_table *tbl = table_group->tables[i];
> +
> +             if (!tbl)
> +                     continue;
> +
> +             if ((tbl->it_page_shift == stt->page_shift) &&
> +                             (tbl->it_offset == stt->offset)) {
> +                     tbltmp = tbl;
> +                     break;
> +             }
> +     }
> +     if (!tbltmp)
> +             return -ENODEV;
> +
> +     list_for_each_entry_rcu(kg, &stt->groups, next) {
> +             if (kg->refgrp == grp)
> +                     return -EBUSY;
> +     }
> +
> +     kg = kzalloc(sizeof(*kg), GFP_KERNEL);
> +     kg->refgrp = grp;
> +     kg->tbl = tbltmp;
> +     list_add_rcu(&kg->next, &stt->groups);
> +
> +     return 0;
> +}
> +
> +static void kvm_spapr_tce_put_group(struct rcu_head *head)
> +{
> +     struct kvmppc_spapr_tce_group *kg = container_of(head,
> +                     struct kvmppc_spapr_tce_group, rcu);
> +
> +     iommu_group_put(kg->refgrp);
> +     kg->refgrp = NULL;
> +     kfree(kg);
> +}
> +
> +extern void kvm_spapr_tce_detach_iommu_group(struct kvm *kvm,
> +                             struct iommu_group *grp)

Hrm.  attach takes an explicit liobn, but this one iterates over all
liobns.  Why the asymmetry?

> +{
> +     struct kvmppc_spapr_tce_table *stt;
> +     struct iommu_table_group *table_group;
> +     struct kvmppc_spapr_tce_group *kg;
> +
> +     table_group = iommu_group_get_iommudata(grp);
> +     if (!table_group)
> +             return;
> +
> +     list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
> +             list_for_each_entry_rcu(kg, &stt->groups, next) {
> +                     if (kg->refgrp == grp) {
> +                             list_del_rcu(&kg->next);
> +                             call_rcu(&kg->rcu, kvm_spapr_tce_put_group);
> +                             break;
> +                     }
> +             }
> +     }
> +}
> +
>  long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>                                  struct kvm_create_spapr_tce_64 *args)
>  {
> @@ -181,6 +288,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>       stt->offset = args->offset;
>       stt->size = size;
>       stt->kvm = kvm;
> +     INIT_LIST_HEAD_RCU(&stt->groups);
>  
>       for (i = 0; i < npages; i++) {
>               stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to